From 42e18e9a21f76d1b886d52ce9ad614e7b5a7be91 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 8 Jul 2022 22:00:50 +0000 Subject: [PATCH 001/138] CoreMark makefile and printing improvements --- benchmarks/coremark/Makefile | 4 ++-- benchmarks/coremark/riscv64-baremetal/core_portme.c | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 6ad33eece..eebbfab5e 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -13,7 +13,7 @@ PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=rv$(XLEN)im -static -fal -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ - -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 + -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=2 # flags that cause build errors mcmodel=medlow @@ -28,7 +28,7 @@ PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=rv$(XLEN)im -static -fal all: $(work_dir)/coremark.bare.riscv.elf.memfile run: - (cd ../../pipelined/regression && (vsim -c -do "do wally-pipelined-batch.do rv$(XLEN)gc coremark" 2>&1 | tee $(work_dir)/coremark.sim.log)) + (cd ../../pipelined/regression && (time vsim -c -do "do wally-pipelined-batch.do rv$(XLEN)gc coremark" 2>&1 | tee $(work_dir)/coremark.sim.log)) cd ../../benchmarks/coremark/ $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.c b/benchmarks/coremark/riscv64-baremetal/core_portme.c index d63f08d19..57b7993ad 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.c +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.c @@ -196,10 +196,13 @@ void stop_time(void) { CORE_TICKS get_time(void) { CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); unsigned long instructions = minstretDiff(); - ee_printf(" Called get_time\n"); + long long cm100 = 1000000000 / elapsed; // coremark score * 100 + long long cpi100 = elapsed*100/instructions; // CPI * 100 + ee_printf(" WALLY CoreMark Results (from get_time)\n"); ee_printf(" Elapsed MTIME: %u\n", elapsed); ee_printf(" Elapsed MINSTRET: %lu\n", instructions); - ee_printf(" CPI: %lu / %lu\n", elapsed, instructions); + ee_printf(" COREMARK/MHz Score: 10,000,000 / %lu = %d.%02d \n", elapsed, cm100/100, cm100%100); + ee_printf(" CPI: %lu / %lu = %d.%02d\n", elapsed, instructions, cpi100/100, cpi100%100); return elapsed; } /* Function: time_in_secs From 97e7e619d90d63730e0d8a75e345d3ff98e76617 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 8 Jul 2022 23:56:57 +0000 Subject: [PATCH 002/138] moved fpu ieu write data mux to lsu --- pipelined/regression/sim-wally-batch | 2 +- pipelined/src/cache/cache.sv | 14 ++++---------- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/lsu.sv | 17 +++++++++++------ pipelined/testbench/testbench.sv | 1 + pipelined/testbench/tests.vh | 2 +- tests/riscof/Makefile | 2 +- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pipelined/regression/sim-wally-batch b/pipelined/regression/sim-wally-batch index 91f116976..7e821e584 100755 --- a/pipelined/regression/sim-wally-batch +++ b/pipelined/regression/sim-wally-batch @@ -1 +1 @@ -vsim -c -do "do wally-pipelined-batch.do rv64gc imperas64f" +vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d" diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index f6aad78e7..b80df13ae 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -42,10 +42,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] PAdr, // physical address input logic [(`XLEN-1)/8:0] ByteMask, - input logic [`XLEN-1:0] FinalWriteData, - input logic [`FLEN-1:0] FWriteDataM, + input logic [WORDLEN-1:0] FinalWriteData, input logic FLoad2, - input logic FpLoadStoreM, output logic CacheCommitted, output logic CacheStall, // to performance counters to cpu @@ -72,7 +70,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER localparam SETLEN = $clog2(NUMLINES); localparam SETTOP = SETLEN+OFFSETLEN; localparam TAGLEN = `PA_BITS - SETTOP; - localparam WORDSPERLINE = LINELEN/`XLEN; + localparam WORDSPERLINE = LINELEN/WORDLEN; localparam FlushAdrThreshold = NUMLINES - 1; logic SelAdr; @@ -162,12 +160,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write data and address. Muxes between writes from bus and writes from CPU. ///////////////////////////////////////////////////////////////////////////////////////////// - if (`LLEN>`XLEN) - mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData)); - else - mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); + mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), + .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}), diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 0c43c736d..c96396ab9 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -226,7 +226,7 @@ module ifu ( icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0), .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), - .CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(), + .CacheFetchLine(ICacheFetchLine), .FLoad2(), .CacheWriteLine(), .ReadDataWord(FinalInstrRawF), .Cacheable(CacheableF), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 4b200f706..a63f813ee 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -192,7 +192,8 @@ module lsu ( // Memory System // Either Data Cache or Data Tightly Integrated Memory or just bus interface ///////////////////////////////////////////////////////////////////////////////////////////// - logic [`XLEN-1:0] AMOWriteDataM, FinalWriteDataM, LittleEndianWriteDataM; + logic [`XLEN-1:0] AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM; + logic [`LLEN-1:0] FinalWriteDataM; logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM; logic [`LLEN-1:0] ReadDataWordMuxM; logic IgnoreRequest; @@ -202,7 +203,7 @@ module lsu ( if (`DMEM == `MEM_TIM) begin : dtim // *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW. // Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops - dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM, + dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM, .DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM), .DCacheMiss, .DCacheAccess); @@ -230,15 +231,19 @@ module lsu ( mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}), .s(SelUncachedAdr), .y(ReadDataWordMuxM)); - mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(FinalWriteDataM), + mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); if(CACHE_ENABLED) begin : dcache + if (`LLEN>`FLEN) + mux2 #(`LLEN) datamux({(`LLEN-`XLEN)'(0), IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); + else + assign FinalWriteDataM[`XLEN-1:0] = IEUWriteDataM; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2, + .ByteMask(ByteMaskM), .WordCount, .FLoad2, .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), @@ -286,10 +291,10 @@ module lsu ( // swap the bytes when read from big-endian memory ///////////////////////////////////////////////////////////////////////////////////////////// if (`BIGENDIAN_SUPPORTED) begin:endian - bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM)); + bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM)); bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM)); end else begin - assign FinalWriteDataM = LittleEndianWriteDataM; + assign IEUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordM; end diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index c248a7505..40ea9a58a 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -114,6 +114,7 @@ logic [3:0] dummy; "arch32f": if (`F_SUPPORTED) tests = arch32f; "imperas32i": tests = imperas32i; "imperas32f": if (`F_SUPPORTED) tests = imperas32f; + "wally32d": if (`D_SUPPORTED) tests = wally32d; "imperas32m": if (`M_SUPPORTED) tests = imperas32m; "wally32a": if (`A_SUPPORTED) tests = wally32a; "imperas32c": if (`C_SUPPORTED) tests = imperas32c; diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 0d57dbdd7..c6ebf08c3 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -34,7 +34,7 @@ string tvpaths[] = '{ "../../addins/imperas-riscv-tests/work/", "../../tests/riscof/work/riscv-arch-test/", - "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", + "../../tests/riscof/work/wally-riscv-arch-test/", //"../../tests/wally-riscv-arch-test/work/", // "../../tests/imperas-riscv-tests/work/", "../../benchmarks/coremark/work/", "../../addins/embench-iot/" diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index 830b9eef6..621a5b54b 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) XLEN ?= 64 -all: root build_arch # build_wally memfile +all: root build_arch build_wally memfile root: mkdir -p $(work_dir) From 62205ebb3beb95fd393aeebdb9a351b86bc7223f Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Sat, 9 Jul 2022 00:26:45 +0000 Subject: [PATCH 003/138] renamed FLoad2 to FStore2 --- pipelined/src/cache/cache.sv | 4 ++-- pipelined/src/cache/cacheway.sv | 4 ++-- pipelined/src/fpu/fpu.sv | 6 +++--- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/lsu.sv | 8 ++++---- pipelined/src/wally/wallypipelinedcore.sv | 6 +++--- pipelined/testbench/tests.vh | 2 +- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index b80df13ae..ca6a5c9cf 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -43,7 +43,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER input logic [`PA_BITS-1:0] PAdr, // physical address input logic [(`XLEN-1)/8:0] ByteMask, input logic [WORDLEN-1:0] FinalWriteData, - input logic FLoad2, + input logic FStore2, output logic CacheCommitted, output logic CacheStall, // to performance counters to cpu @@ -121,7 +121,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) - CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2, + CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FStore2, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .Invalidate(InvalidateCacheM)); diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index ac1e26e8f..d1c85675d 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -38,7 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic [$clog2(NUMLINES)-1:0] RAdr, input logic [`PA_BITS-1:0] PAdr, input logic [LINELEN-1:0] CacheWriteData, - input logic FLoad2, + input logic FStore2, input logic SetValidWay, input logic ClearValidWay, input logic SetDirtyWay, @@ -79,7 +79,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic [2**LOGWPL-1:0] MemPAdrDecodedtmp; onehotdecoder #(LOGWPL) adrdec( .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp)); - assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0}; + assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FStore2}}, 1'b0}; end else onehotdecoder #(LOGWPL) adrdec( .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 7cf109013..255feb44a 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -42,7 +42,7 @@ module fpu ( input logic [1:0] STATUS_FS, // Is floating-point enabled? output logic FRegWriteM, // FP register write enable output logic FpLoadStoreM, // Fp load instruction? - output logic FLoad2, + output logic FStore2, output logic FStallD, // Stall the decode stage output logic FWriteIntE, // integer register write enables output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory @@ -298,8 +298,8 @@ module fpu ( assign FWriteDataE = FSrcYE[`XLEN-1:0]; end else begin logic [`FLEN-1:0] FWriteDataE; - if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT; - else assign FLoad2 = FmtM; + if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT; + else assign FStore2 = FmtM; if (`FPSIZES==1) assign FWriteDataE = FSrcYE; else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index c96396ab9..5c2f799d0 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -226,7 +226,7 @@ module ifu ( icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0), .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), - .CacheFetchLine(ICacheFetchLine), .FLoad2(), + .CacheFetchLine(ICacheFetchLine), .FStore2(), .CacheWriteLine(), .ReadDataWord(FinalInstrRawF), .Cacheable(CacheableF), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index a63f813ee..50ecdb188 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -58,7 +58,7 @@ module lsu ( input logic sfencevmaM, // fpu input logic [`FLEN-1:0] FWriteDataM, - input logic FLoad2, + input logic FStore2, input logic FpLoadStoreM, // faults output logic LoadPageFaultM, StoreAmoPageFaultM, @@ -236,14 +236,14 @@ module lsu ( if(CACHE_ENABLED) begin : dcache if (`LLEN>`FLEN) - mux2 #(`LLEN) datamux({(`LLEN-`XLEN)'(0), IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); + mux2 #(`LLEN) datamux({{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); else - assign FinalWriteDataM[`XLEN-1:0] = IEUWriteDataM; + assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .ByteMask(ByteMaskM), .WordCount, .FLoad2, + .ByteMask(ByteMaskM), .WordCount, .FStore2, .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 7538a5419..372f4aba5 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -93,7 +93,7 @@ module wallypipelinedcore ( logic FStallD; logic FWriteIntE; logic [`XLEN-1:0] FWriteDataE; - logic FLoad2; + logic FStore2; logic [`FLEN-1:0] FWriteDataM; logic [`XLEN-1:0] FIntResM; logic [`XLEN-1:0] FCvtIntResW; @@ -259,7 +259,7 @@ module wallypipelinedcore ( .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, .FpLoadStoreM, - .FWriteDataM, .FLoad2, + .FWriteDataM, .FStore2, //.DataMisalignedM(DataMisalignedM), .IEUAdrE, .IEUAdrM, .WriteDataE, .ReadDataW, .FlushDCacheM, @@ -400,7 +400,7 @@ module wallypipelinedcore ( .STATUS_FS, // is floating-point enabled? .FRegWriteM, // FP register write enable .FpLoadStoreM, - .FLoad2, + .FStore2, .FStallD, // Stall the decode stage .FWriteIntE, // integer register write enable .FWriteDataE, // Data to be written to memory diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index c6ebf08c3..e1b69cd2b 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -34,7 +34,7 @@ string tvpaths[] = '{ "../../addins/imperas-riscv-tests/work/", "../../tests/riscof/work/riscv-arch-test/", - "../../tests/riscof/work/wally-riscv-arch-test/", //"../../tests/wally-riscv-arch-test/work/", // + "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", // "../../tests/imperas-riscv-tests/work/", "../../benchmarks/coremark/work/", "../../addins/embench-iot/" From d1a7832dd938c4d818d0cdabc85b522245b4e30d Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Jul 2022 12:47:34 +0000 Subject: [PATCH 004/138] added comment about RAMs in cacheway --- pipelined/src/cache/cacheway.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index a5f686531..cbaf915d0 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -105,6 +105,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, // Data Array ///////////////////////////////////////////////////////////////////////////////////////////// + // *** instantiate one larger RAM, not one per RAM. Expand byte mask genvar words; for(words = 0; words < LINELEN/`XLEN; words++) begin: word sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk, .Adr(RAdr), From 03a20610aa5dcf49bbaba260e492e9ea0eb0e5dd Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Jul 2022 12:48:51 +0000 Subject: [PATCH 005/138] added comment about checking SRAM size --- pipelined/src/cache/sram1p1rw.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelined/src/cache/sram1p1rw.sv b/pipelined/src/cache/sram1p1rw.sv index 1b853702b..49bf5d852 100644 --- a/pipelined/src/cache/sram1p1rw.sv +++ b/pipelined/src/cache/sram1p1rw.sv @@ -53,6 +53,7 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) ( if (`USE_SRAM == 1) begin // 64 x 128-bit SRAM + // check if the size is ok, complain if not*** logic [WIDTH-1:0] BitWriteMask; for (index=0; index < WIDTH; index++) assign BitWriteMask[index] = ByteMask[index/8]; From cd2e32bcbaf63b59520203837e4711e793a4ed71 Mon Sep 17 00:00:00 2001 From: DTowersM Date: Mon, 11 Jul 2022 17:20:38 +0000 Subject: [PATCH 006/138] switched coremark to 10 iterations --- benchmarks/coremark/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index eebbfab5e..325509006 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -13,7 +13,7 @@ PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=rv$(XLEN)im -static -fal -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ - -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=2 + -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 # flags that cause build errors mcmodel=medlow From fe7d03a3da3a3d0879b9c4cbfb1563c06501a3e9 Mon Sep 17 00:00:00 2001 From: DTowersM Date: Mon, 11 Jul 2022 21:13:09 +0000 Subject: [PATCH 007/138] added some preliminary support for coremark XLEN=32, made sure rv64 not impacted --- benchmarks/coremark/Makefile | 5 +++-- benchmarks/coremark/riscv64-baremetal/core_portme.h | 10 +++++++++- benchmarks/coremark/riscv64-baremetal/core_portme.mak | 4 ++-- pipelined/testbench/testbench.sv | 1 + pipelined/testbench/tests.vh | 2 +- 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 325509006..2db418aa3 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -8,12 +8,13 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=rv$(XLEN)im -static -falign-functions=16 \ +ARCH := rv$(XLEN)im +PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -funroll-all-loops -finline-functions -falign-jumps=4 \ -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ - -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 + -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN) # flags that cause build errors mcmodel=medlow diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.h b/benchmarks/coremark/riscv64-baremetal/core_portme.h index 1d2baeb95..33768b0f1 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.h +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h @@ -66,6 +66,9 @@ typedef size_t CORE_TICKS; #elif HAS_TIME_H #include typedef clock_t CORE_TICKS; +// #elif (XLEN==32) +// #include +// typedef ee_u32 CORE_TICKS; #else /* Configuration: size_t and clock_t Note these need to match the size of the clock output and the xLen the processor supports @@ -105,11 +108,16 @@ typedef signed int ee_s32; typedef double ee_f32; typedef unsigned char ee_u8; typedef unsigned int ee_u32; -typedef unsigned long long ee_ptr_int; +#if (XLEN==64) + typedef unsigned long long ee_ptr_int; +#else + typedef ee_u32 ee_ptr_int; +#endif typedef size_t ee_size_t; /* align an offset to point to a 32b value */ #define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) + /* Configuration: SEED_METHOD Defines method to get seed values that cannot be computed at compile time. diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.mak b/benchmarks/coremark/riscv64-baremetal/core_portme.mak index 4bae943dc..27e31b859 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.mak +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.mak @@ -33,13 +33,13 @@ CC = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc # Flag: CFLAGS # Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" #PORT_CFLAGS = -O2 -static -std=gnu99 -PORT_CFLAGS = -O2 -mcmodel=medany -static -fno-tree-loop-distribute-patterns -std=gnu99 -fno-common -nostartfiles -lm -lgcc -T $(PORT_DIR)/link.ld +PORT_CFLAGS = -mcmodel=medany -fno-tree-loop-distribute-patterns -fno-common -lm -lgcc -T $(PORT_DIR)/link.ld FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" #Flag: LFLAGS_END # Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). # Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. -LFLAGS_END += +LFLAGS_END += -static-libgcc -lgcc # Flag: PORT_SRCS # Port specific source files can be added here PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/syscalls.c $(PORT_DIR)/crt.S diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 568f2722d..8f6f99a59 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -123,6 +123,7 @@ logic [3:0] dummy; "wally32priv": tests = wally32priv; "wally32periph": tests = wally32periph; "embench": tests = embench; + "coremark": tests = coremark; endcase end if (tests.size() == 0) begin diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index e1c5fb249..8971e544a 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -36,7 +36,7 @@ string tvpaths[] = '{ "../../tests/riscof/work/riscv-arch-test/", "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", "../../tests/imperas-riscv-tests/work/", - "../../benchmarks/coremark/work/", //"../../benchmarks/coremark/work/", + "../../benchmarks/coremark/work/", "../../addins/embench-iot/" }; From bea4ec078d5ac58bcb9768214c3bc9403579d1f3 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 11 Jul 2022 18:30:21 -0700 Subject: [PATCH 008/138] variable interations implemented in radix-4 divider --- pipelined/config/rv64fp/wally-config.vh | 4 +- pipelined/config/shared/wally-shared.vh | 12 +- pipelined/regression/wave-fpu.do | 10 +- pipelined/src/fpu/divshiftcalc.sv | 9 +- pipelined/src/fpu/divsqrt.sv | 10 +- pipelined/src/fpu/flags.sv | 38 ++-- pipelined/src/fpu/fmashiftcalc.sv | 24 +- pipelined/src/fpu/fpu.sv | 10 +- pipelined/src/fpu/lzacorrection.sv | 4 +- pipelined/src/fpu/postprocess.sv | 26 +-- pipelined/src/fpu/resultselect.sv | 290 ------------------------ pipelined/src/fpu/resultsign.sv | 4 +- pipelined/src/fpu/round.sv | 6 +- pipelined/src/fpu/srt-radix4.sv | 140 ++++++++---- pipelined/src/fpu/srtfsm.sv | 10 +- pipelined/src/fpu/srtpreproc.sv | 16 +- pipelined/testbench/testbench-fp.sv | 14 +- 17 files changed, 203 insertions(+), 424 deletions(-) delete mode 100644 pipelined/src/fpu/resultselect.sv diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index b92bc07aa..cc8d1b2b8 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,14 +32,14 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 +`define XLEN 32 // IEEE 754 compliance `define IEEE754 0 // MISA RISC-V configuration per specification // ZYXWVUTSRQPONMLKJIHGFEDCBA -`define MISA 32'b0000000000101000001000100100101 +`define MISA 32'b0000000000101000001000100101101 `define ZICSR_SUPPORTED 1 `define ZIFENCEI_SUPPORTED 1 `define COUNTERS 32 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 671f73430..c064783c2 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -95,12 +95,22 @@ // largest length in IEU/FPU `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) -`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) `define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9)) `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) +// division constants +`define RADIX 4 +`define DIVCOPIES 4 +`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) +`define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) +`define LOGR ((`RADIX==2) ? 1 : 2) +`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES)) +`define DURLEN ($clog2($rtoi(`FPDUR)+1)) +`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES) + + `define USE_SRAM 0 // Disable spurious Verilator warnings diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 9e7ba49b1..58f782bd6 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -11,7 +11,7 @@ add wave -noupdate /testbenchfp/DivStart add wave -noupdate /testbenchfp/DivBusy add wave -noupdate /testbenchfp/srtfsm/state add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/* @@ -21,8 +21,12 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/* +add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/* +add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/* +add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/* add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index 935ed3c18..a4f3feff6 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -1,9 +1,9 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`DIVLEN+2:0] Quot, + input logic [`QLEN-1:0] Quot, input logic [`FMTBITS-1:0] Fmt, - input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2, + input logic [`DURLEN-1:0] DivEarlyTermShift, input logic [`NE+1:0] DivCalcExp, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, output logic [`NORMSHIFTSZ-1:0] DivShiftIn, @@ -32,9 +32,10 @@ module divshiftcalc( // inital Left shift amount = NF assign NormShift = (`NE+2)'(`NF); // if the shift amount is negitive then dont shift (keep sticky bit) - assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, DivEarlyTermShiftDiv2&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0}; + // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) + assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)}; // *** may be able to reduce shifter size - assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}}; + assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 086b97d8c..c4f09aea5 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -47,8 +47,8 @@ module divsqrt( output logic DivBusy, output logic DivDone, output logic [`NE+1:0] DivCalcExpM, - output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M, - output logic [`DIVLEN+2:0] QuotM + output logic [`DURLEN-1:0] EarlyTermShiftM, + output logic [`QLEN-1:0] QuotM // output logic [`XLEN-1:0] RemM, ); @@ -57,12 +57,12 @@ module divsqrt( logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] Dpreproc; - logic [$clog2(`DIVLEN/2+3)-1:0] Dur; + logic [`DURLEN-1:0] Dur; srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, - .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, + .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM)); + srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 98250a45b..4e16bc965 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -34,20 +34,20 @@ module flags( input logic XInf, YInf, ZInf, // inputs are infinity input logic Plus1, input logic InfIn, // is a Inf input being used + input logic NaNIn, // is a NaN input being used + input logic [`FMTBITS-1:0] OutFmt, // output format input logic XZero, YZero, // inputs are zero input logic XNaN, YNaN, // inputs are NaN - input logic NaNIn, // is a NaN input being used input logic Sqrt, // Sqrt? input logic ToInt, // convert to integer input logic IntToFp, // convert integer to floating point input logic Int64, // convert to 64 bit integer input logic Signed, // convert to a signed integer - input logic [`FMTBITS-1:0] OutFmt, // output format input logic [`NE:0] CvtCe, // the calculated expoent - Cvt input logic CvtOp, // conversion opperation? input logic DivOp, // conversion opperation? input logic FmaOp, // Fma opperation? - input logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow + input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow input logic [`NE+1:0] Nexp, // exponent of the normalized sum input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits input logic FmaAs, FmaPs, // the product and modified Z signs @@ -73,30 +73,30 @@ module flags( if (`FPSIZES == 1) begin - assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE]; - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 2) begin - assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]); + assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 3) begin always_comb case (OutFmt) - `FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE]; - `FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]); - `FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]); + `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; + `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); + `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]); default: ResExpGteMax = 1'bx; endcase - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 4) begin always_comb case (OutFmt) - `Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE]; - `D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]); - `S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]); - `H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]); + `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE]; + `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]); + `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]); + `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]); endcase // a left shift of intlen+1 is still in range but any more than that is an overflow // inital: | 64 0's | XLEN | @@ -110,14 +110,14 @@ module flags( // - any of the bits after the most significan 1 is one // - the most signifcant in 65 or 33 is still a one in the number and // one of the later bits is one - assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end // if the result is greater than or equal to the max exponent(not taking into account sign) // | and the exponent isn't negitive // | | if the input isnt infinity or NaN // | | | - assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero); + assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero); // detecting tininess after rounding // the exponent is negitive @@ -127,7 +127,7 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed @@ -153,7 +153,7 @@ module flags( // | | | | or the res rounds up out of bounds // | | | | and the res didn't underflow // | | | | | - assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); + assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); // | // or when the positive res rounds up out of range assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp); diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 5f55e17b6..d4898e806 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -37,7 +37,7 @@ module fmashiftcalc( input logic FmaKillProd, // is the product set to zero input logic ZDenorm, output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results - output logic FmaSmZero, // is the result denormalized - calculated before LZA corection + output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [3*`NF+8:0] FmaShiftIn // is the sum zero @@ -50,7 +50,7 @@ module fmashiftcalc( /////////////////////////////////////////////////////////////////////////////// //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero - assign FmaSmZero = ~(|FmaSm); + assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4); @@ -90,7 +90,7 @@ module fmashiftcalc( logic Sum0LEZ, Sum0GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); - assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; + assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; end else if (`FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; @@ -98,7 +98,7 @@ module fmashiftcalc( assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; - assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSmZero; + assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; end else if (`FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; @@ -110,9 +110,9 @@ module fmashiftcalc( assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp; always_comb begin case (Fmt) - `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; - `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero; - `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero; + `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; default: FmaPreResultDenorm = 1'bx; endcase end @@ -129,10 +129,10 @@ module fmashiftcalc( assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp; always_comb begin case (Fmt) - 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; - 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero; - 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero; - 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSmZero; + 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; + 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking end @@ -144,7 +144,7 @@ module fmashiftcalc( // - if kill prod dont add to exp // Determine if the result is denormal - // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSmZero; + // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero; // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 8336c39cf..e1c9e5fac 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -125,12 +125,12 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`DIVLEN+2:0] QuotE, QuotM; + logic [`QLEN-1:0] QuotM; logic [`NE+1:0] DivCalcExpE, DivCalcExpM; logic DivNegStickyE, DivNegStickyM; logic DivStickyE, DivStickyM; logic DivDoneM; - logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M; + logic [`DURLEN-1:0] EarlyTermShiftM; // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -289,7 +289,7 @@ module fpu ( divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal - .EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM)); + .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); // other FP execution units fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE); @@ -381,12 +381,12 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; - postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2M), + postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM), .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), - .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .W(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); + .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM); diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv index 03b36f4fe..17db0c0b9 100644 --- a/pipelined/src/fpu/lzacorrection.sv +++ b/pipelined/src/fpu/lzacorrection.sv @@ -38,7 +38,7 @@ module lzacorrection( input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection input logic FmaKillProd, // is the product set to zero - input logic FmaSmZero, + input logic FmaSZero, output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction output logic [`NE+1:0] DivCorrExp, output logic [`NE+1:0] FmaSe // exponent of the normalized sum @@ -59,7 +59,7 @@ module lzacorrection( assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSmZero|ResDenorm)}}; + assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index e165e7e1e..18452abd0 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -54,12 +54,12 @@ module postprocess( input logic FmaInvA, // do you invert Z input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count //divide signals - input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2, + input logic [`DURLEN-1:0] DivEarlyTermShift, input logic DivSticky, input logic DivNegSticky, input logic DivDone, input logic [`NE+1:0] DivCalcExp, - input logic [`DIVLEN+2:0] Quot, + input logic [`QLEN-1:0] Quot, // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -69,7 +69,7 @@ module postprocess( input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder) input logic IntZero, // is the input zero // final results - output logic [`FLEN-1:0] W, // FMA final result + output logic [`FLEN-1:0] PostProcRes, // FMA final result output logic [4:0] PostProcFlg, output logic [`XLEN-1:0] FCvtIntRes // the int conversion result ); @@ -81,7 +81,7 @@ module postprocess( logic Nsgn; logic [`NE+1:0] Nexp; logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction - logic [`NE+1:0] FullResExp; // Re with bits to determine sign and overflow + logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow logic S; // S bit logic UfPlus1; // do you add one (for determining underflow flag) logic R; // bits needed to determine rounding @@ -95,7 +95,7 @@ module postprocess( logic [`FMTBITS-1:0] OutFmt; // fma signals logic [`NE+1:0] FmaSe; // exponent of the normalized sum - logic FmaSmZero; // is the sum zero + logic FmaSZero; // is the sum zero logic [3*`NF+8:0] FmaShiftIn; // shift input logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection @@ -153,8 +153,8 @@ module postprocess( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp, - .ZDenorm, .FmaSmZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShiftDiv2, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); + .ZDenorm, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); + divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSel) @@ -185,7 +185,7 @@ module postprocess( lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, - .DivCorrExp, .FmaSmZero, .Shifted, .FmaSe, .Nfrac); + .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -204,14 +204,14 @@ module postprocess( round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, .DivSticky, .DivNegSticky, .DivDone, - .DivOp, .UfPlus1, .FullResExp, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); /////////////////////////////////////////////////////////////////////////////// // Sign calculation /////////////////////////////////////////////////////////////////////////////// resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S, - .FmaOp, .ZInf, .InfIn, .FmaSmZero, .Mult, .Nsgn, .Ws); + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws); /////////////////////////////////////////////////////////////////////////////// // Flags @@ -220,7 +220,7 @@ module postprocess( flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, - .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1, + .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); /////////////////////////////////////////////////////////////////////////////// @@ -228,10 +228,10 @@ module postprocess( /////////////////////////////////////////////////////////////////////////////// negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); - resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, .XInf, .YInf, .DivOp, - .DivByZero, .FullResExp, .CvtCe, .Ws, .Re, .Rf, .W, .FCvtIntRes); + .DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes); endmodule diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv deleted file mode 100644 index 4389056f5..000000000 --- a/pipelined/src/fpu/resultselect.sv +++ /dev/null @@ -1,290 +0,0 @@ -/////////////////////////////////////////// -// -// Written: me@KatherineParry.com -// Modified: 7/5/2022 -// -// Purpose: special case selection -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module resultselect( - input logic Xs, // input signs - input logic [`NF:0] Xm, Ym, Zm, // input mantissas - input logic XNaN, YNaN, ZNaN, // inputs are NaN - input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [`FMTBITS-1:0] OutFmt, // output format - input logic InfIn, - input logic XInf, YInf, - input logic XZero, - input logic IntZero, - input logic NaNIn, - input logic IntToFp, - input logic Int64, - input logic Signed, - input logic CvtOp, - input logic DivOp, - input logic FmaOp, - input logic Plus1, - input logic DivByZero, - input logic [`NE:0] CvtCe, // the calculated expoent - input logic Ws, // the res's sign - input logic IntInvalid, Invalid, Overflow, // flags - input logic CvtResUf, - input logic [`NE-1:0] Re, // Res exponent - input logic [`NE+1:0] FullResExp, // Res exponent - input logic [`NF-1:0] Rf, // Res fraction - input logic [`XLEN+1:0] CvtNegRes, // the negation of the result - output logic [`FLEN-1:0] W, // final res - output logic [`XLEN-1:0] FCvtIntRes // final res -); - logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results - logic OfResMax; - logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output - logic KillRes; - logic SelOfRes; - - - // does the overflow result output the maximum normalized floating point number - // output infinity if the input is infinity - assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Ws) | (Frm[1:0]==2'b11&Ws)); - - if (`FPSIZES == 1) begin - - //NaN res selection depending on standard - if(`IEEE754) begin - assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - assign OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - assign UfRes = {Ws, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = {Ws, Re, Rf}; - - end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions? - if(`IEEE754) begin - assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; - assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end else begin - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end - - assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} : - OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; - assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; - - end else if (`FPSIZES == 3) begin - always_comb - case (OutFmt) - `FMT: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {Ws, Re, Rf}; - end - `FMT1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; - ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end - OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; - UfRes = {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; - end - `FMT2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; - YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]}; - ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]}; - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2{1'b1}}, (`NF2)'(0)}; - UfRes = {{`FLEN-`LEN2{1'b1}}, Ws, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN2{1'b1}}, Ws, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; - end - default: begin - if(`IEEE754) begin - XNaNRes = (`FLEN)'(0); - YNaNRes = (`FLEN)'(0); - ZNaNRes = (`FLEN)'(0); - InvalidRes = (`FLEN)'(0); - end else begin - InvalidRes = (`FLEN)'(0); - end - OfRes = (`FLEN)'(0); - UfRes = (`FLEN)'(0); - NormRes = (`FLEN)'(0); - end - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (OutFmt) - 2'h3: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {Ws, Re, Rf}; - end - 2'h1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; - YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]}; - ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]}; - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - end - OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE{1'b1}}, (`D_NF)'(0)}; - UfRes = {{`FLEN-`D_LEN{1'b1}}, Ws, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`D_LEN{1'b1}}, Ws, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; - end - 2'h0: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; - YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]}; - ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]}; - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE{1'b1}}, (`S_NF)'(0)}; - UfRes = {{`FLEN-`S_LEN{1'b1}}, Ws, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`S_LEN{1'b1}}, Ws, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; - end - 2'h2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; - YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]}; - ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]}; - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE{1'b1}}, (`H_NF)'(0)}; - // zero is exact fi dividing by infinity so don't add 1 - UfRes = {{`FLEN-`H_LEN{1'b1}}, Ws, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`H_LEN{1'b1}}, Ws, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; - end - endcase - - end - - - - - - // determine if you shoould kill the res - Cvt - // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 - // - dont set to zero if fp input is zero but not using the fp input - // - dont set to zero if int input is zero but not using the int input - assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullResExp[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); - assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); - // output infinity with result sign if divide by zero - if(`IEEE754) begin - assign W = XNaN&~(IntToFp&CvtOp) ? XNaNRes : - YNaN&~CvtOp ? YNaNRes : - ZNaN&FmaOp ? ZNaNRes : - Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end else begin - assign W = NaNIn|Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end - - /////////////////////////////////////////////////////////////////////////////////////// - // - // ||||||||||| ||| ||| ||||||||||||| - // ||| |||||| ||| ||| - // ||| ||| ||| ||| ||| - // ||| ||| |||||| ||| - // ||||||||||| ||| ||| ||| - // - /////////////////////////////////////////////////////////////////////////////////////// - - // *** probably can optimize the negation - // select the overflow integer res - // - negitive infinity and out of range negitive input - // | int | long | - // signed | -2^31 | -2^63 | - // unsigned | 0 | 0 | - // - // - positive infinity and out of range positive input and NaNs - // | int | long | - // signed | 2^31-1 | 2^63-1 | - // unsigned | 2^32-1 | 2^64-1 | - // - // other: 32 bit unsinged res should be sign extended as if it were a signed number - assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive - Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive - Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive - {`XLEN{1'b1}};// unsigned positive - - - // select the integer output - // - if the input is invalid (out of bounds NaN or Inf) then output overflow res - // - if the input underflows - // - if rounding and signed opperation and negitive input, output -1 - // - otherwise output a rounded 0 - // - otherwise output the normal res (trmined and sign extended if nessisary) - assign FCvtIntRes = IntInvalid ? OfIntRes : - CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? - Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; -endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index 05c3b4618..e6de0c181 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -35,7 +35,7 @@ module resultsign( input logic InfIn, input logic FmaOp, input logic [`NE+1:0] FmaSe, - input logic FmaSmZero, + input logic FmaSZero, input logic Mult, input logic R, input logic S, @@ -61,6 +61,6 @@ module resultsign( // if -p + z is the Sum positive // if -p - z then the Sum is negitive assign InfSgn = ZInf ? FmaAs : FmaPs; - assign Ws = InfIn&FmaOp ? InfSgn : FmaSmZero&FmaOp ? ZeroSgn : Nsgn; + assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 4c185ff32..c73edc088 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -57,7 +57,7 @@ module round( input logic DivSticky, // sticky bit input logic DivNegSticky, output logic UfPlus1, // do you add or subtract on from the result - output logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow + output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow output logic [`NF-1:0] Rf, // Result fraction output logic [`NE-1:0] Re, // Result exponent output logic S, // sticky bit @@ -344,8 +344,8 @@ module round( // round the result // - if the fraction overflows one should be added to the exponent - assign {FullResExp, Rf} = {Nexp, RoundFrac} + RoundAdd; - assign Re = FullResExp[`NE-1:0]; + assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd; + assign Re = FullRe[`NE-1:0]; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv index 741d4e839..1c7b96489 100644 --- a/pipelined/src/fpu/srt-radix4.sv +++ b/pipelined/src/fpu/srt-radix4.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" -module srtradix4 ( +module srtradix4( input logic clk, input logic DivStart, input logic DivBusy, @@ -40,20 +40,29 @@ module srtradix4 ( input logic [`DIVLEN-1:0] X, input logic [`DIVLEN-1:0] Dpreproc, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`DIVLEN+2:0] Quot, + output logic [`QLEN-1:0] Quot, output logic [`DIVLEN+3:0] WSN, WCN, - output logic [`DIVLEN+3:0] WS, WC, + output logic [`DIVLEN+3:0] FirstWS, FirstWC, output logic [`NE+1:0] DivCalcExpM, output logic [`XLEN-1:0] Rem ); - logic [3:0] q; - logic [`DIVLEN+3:0] WSA; - logic [`DIVLEN+3:0] WCA; - logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel; + + /* verilator lint_off UNOPTFLAT */ + logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] Q[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; + /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] D, DBar, D2, DBar2; logic [`NE+1:0] DivCalcExp; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; + logic [`QLEN-1:0] QMux, QMMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -63,47 +72,43 @@ module srtradix4 ( // - otherwise load WSA into the flipflop // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); - flop #(`DIVLEN+4) wsflop(clk, WSN, WS); - mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); - flop #(`DIVLEN+4) wcflop(clk, WCN, WC); + mux2 #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); + flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]); + mux2 #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); + flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); - // Quotient Selection logic - // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - // *** change this for radix 4 - generate w/ stine code - // q encoding: - // 1000 = +2 - // 0100 = +1 - // 0000 = 0 - // 0010 = -1 - // 0001 = -2 - qsel4 qsel4(.D, .WS, .WC, .q); - // Divisor Selection logic - // *** radix 4 change to choose -2 to 2 + // Divisor Selections // - choose the negitive version of what's being selected assign DBar = ~D; assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; assign D2 = {D[`DIVLEN+2:0], 1'b0}; - always_comb - case (q) - 4'b1000: Dsel = DBar2; - 4'b0100: Dsel = DBar; - 4'b0000: Dsel = {(`DIVLEN+4){1'b0}}; - 4'b0010: Dsel = D; - 4'b0001: Dsel = D2; - default: Dsel = {`DIVLEN+4{1'bx}}; - endcase + genvar i; + generate + for(i=0; i<`DIVCOPIES; i++) begin + divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, + .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); + if(i<3) begin + assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; + assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; + assign Q[i+1] = QNext[i]; + assign QM[i+1] = QMNext[i]; + end + end + endgenerate - // Partial Product Generation - // WSA, WCA = WS + WC - qD - csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); - - //*** change for radix 4 - otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot); + // if starting a new divison set Q to 0 and QM to -1 + mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux); + mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); + flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage + flop #(`QLEN) QMreg(clk, QMMux, QM[0]); + + assign Quot = Q[0]; + assign FirstWS = WS[0]; + assign FirstWC = WC[0]; expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); @@ -113,7 +118,50 @@ endmodule // Submodules // //////////////// + /* verilator lint_off UNOPTFLAT */ +module divinteration ( + input logic clk, + input logic DivStart, + input logic DivBusy, + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] DBar, D2, DBar2, + input logic [`QLEN-1:0] Q, QM, + input logic [`DIVLEN+3:0] WS, WC, + output logic [`QLEN-1:0] QNext, QMNext, + output logic [`DIVLEN+3:0] WSA, WCA +); + /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] Dsel; + logic [3:0] q; + + // Quotient Selection logic + // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) + // q encoding: + // 1000 = +2 + // 0100 = +1 + // 0000 = 0 + // 0010 = -1 + // 0001 = -2 + qsel4 qsel4(.D, .WS, .WC, .q); + + always_comb + case (q) + 4'b1000: Dsel = DBar2; + 4'b0100: Dsel = DBar; + 4'b0000: Dsel = {`DIVLEN+4{1'b0}}; + 4'b0010: Dsel = D; + 4'b0001: Dsel = D2; + default: Dsel = {`DIVLEN+4{1'bx}}; + endcase + + // Partial Product Generation + // WSA, WCA = WS + WC - qD + csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); + + otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Q, .QM, .QNext, .QMNext); + +endmodule module qsel4 ( input logic [`DIVLEN+3:0] D, @@ -195,7 +243,8 @@ module otfc4 ( input logic DivStart, input logic DivBusy, input logic [3:0] q, - output logic [`DIVLEN+2:0] Quot + input logic [`QLEN-1:0] Q, QM, + output logic [`QLEN-1:0] QNext, QMNext ); // The on-the-fly converter transfers the quotient @@ -207,16 +256,11 @@ module otfc4 ( // // QM is Q-1. It allows us to write negative bits // without using a costly CPA. - logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux; + // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. - logic [`DIVLEN:0] QR, QMR; - // if starting a new divison set Q to 0 and QM to -1 - mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux); - mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux); - flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage - flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); + logic [`QLEN-3:0] QR, QMR; // shift Q (quotent) and QM (quotent-1) // if q = 2 Q = {Q, 10} QM = {Q, 01} @@ -227,8 +271,8 @@ module otfc4 ( // *** how does the 0 concatination numbers work? always_comb begin - QR = Quot[`DIVLEN:0]; - QMR = QM[`DIVLEN:0]; // Shift Q and QM + QR = Q[`QLEN-3:0]; + QMR = QM[`QLEN-3:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; QMNext = {QR, 2'b01}; diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 008b234d2..fc73cf710 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -40,8 +40,8 @@ module srtfsm( input logic DivStart, input logic StallE, input logic StallM, - input logic [$clog2(`DIVLEN/2+3)-1:0] Dur, - output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, + input logic [`DURLEN-1:0] Dur, + output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivStickyE, output logic DivDone, output logic DivNegStickyE, @@ -51,7 +51,7 @@ module srtfsm( typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; statetype state; - logic [$clog2(`DIVLEN/2+3)-1:0] step; + logic [`DURLEN-1:0] step; logic WZero; //logic [$clog2(`DIVLEN/2+3)-1:0] Dur; logic [`DIVLEN+3:0] W; @@ -63,7 +63,7 @@ module srtfsm( assign DivDone = (state == DONE); assign W = WC+WS; assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? - assign EarlyTermShiftDiv2E = step; + assign EarlyTermShiftE = step; always_ff @(posedge clk) begin if (reset) begin @@ -73,7 +73,7 @@ module srtfsm( if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin - if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin + if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin state <= #1 DONE; end step <= step - 1; diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index d17d2abd3..fa76c0511 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -35,7 +35,7 @@ module srtpreproc ( output logic [`DIVLEN-1:0] X, output logic [`DIVLEN-1:0] Dpreproc, output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [$clog2(`DIVLEN/2+3)-1:0] Dur + output logic [`DURLEN-1:0] Dur ); // logic [`XLEN-1:0] PosA, PosB; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; @@ -63,10 +63,20 @@ module srtpreproc ( assign X = PreprocX; assign Dpreproc = PreprocY; - - assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2); + + assign Dur = (`DURLEN)'($rtoi(`FPDUR)); // assign intExp = zeroCntB - zeroCntA + 1; // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); + // radix 2 radix 4 + // 1 copies DIVLEN+2 DIVLEN+2/2 + // 2 copies DIVLEN+2/2 DIVLEN+2/2*2 + // 4 copies DIVLEN+2/4 DIVLEN+2/2*4 + // 8 copies DIVLEN+2/8 DIVLEN+2/2*8 + + // DIVRESLEN = DIVLEN or DIVLEN+2 + // r = 1 or 2 + // DIVRESLEN/(r*`DIVCOPIES) + endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index fa46a060f..ba14499e0 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -80,9 +80,9 @@ module testbenchfp; logic CvtResSgnE; logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by - logic [`DIVLEN+2:0] Quot; + logic [`QLEN-1:0] Quot; logic CvtResDenormUfE; - logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2; + logic [`DURLEN-1:0] EarlyTermShift; logic DivStart, DivBusy; logic reset = 1'b0; logic [`DIVLEN-1:0] DivX; @@ -90,7 +90,7 @@ module testbenchfp; logic [`DIVLEN+3:0] WSN, WS; logic [`DIVLEN+3:0] WCN, WC; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic [$clog2(`DIVLEN/2+3)-1:0] Dur; + logic [`DURLEN-1:0] Dur; // in-between FMA signals logic Mult; @@ -686,8 +686,8 @@ module testbenchfp; .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone, - .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), - .PostProcFlg(Flg), .W(FpRes), .FCvtIntRes(IntRes)); + .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), + .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), .XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero, @@ -697,8 +697,8 @@ module testbenchfp; .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), - .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), + .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); + srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); assign CmpFlg[3:0] = 0; From 1267d33d3cbe282f1feb093662f7c3b9db909150 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 11 Jul 2022 18:31:51 -0700 Subject: [PATCH 009/138] forgot a file --- pipelined/src/fpu/specialcase.sv | 290 +++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 pipelined/src/fpu/specialcase.sv diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv new file mode 100644 index 000000000..3c28eae2e --- /dev/null +++ b/pipelined/src/fpu/specialcase.sv @@ -0,0 +1,290 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: special case selection +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module specialcase( + input logic Xs, // input signs + input logic [`NF:0] Xm, Ym, Zm, // input mantissas + input logic XNaN, YNaN, ZNaN, // inputs are NaN + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [`FMTBITS-1:0] OutFmt, // output format + input logic InfIn, + input logic NaNIn, + input logic XInf, YInf, + input logic XZero, + input logic IntZero, + input logic IntToFp, + input logic Int64, + input logic Signed, + input logic CvtOp, + input logic DivOp, + input logic FmaOp, + input logic Plus1, + input logic DivByZero, + input logic [`NE:0] CvtCe, // the calculated expoent + input logic Ws, // the res's sign + input logic IntInvalid, Invalid, Overflow, // flags + input logic CvtResUf, + input logic [`NE-1:0] Re, // Res exponent + input logic [`NE+1:0] FullRe, // Res exponent + input logic [`NF-1:0] Rf, // Res fraction + input logic [`XLEN+1:0] CvtNegRes, // the negation of the result + output logic [`FLEN-1:0] PostProcRes, // final res + output logic [`XLEN-1:0] FCvtIntRes // final res +); + logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results + logic OfResMax; + logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output + logic KillRes; + logic SelOfRes; + + + // does the overflow result output the maximum normalized floating point number + // output infinity if the input is infinity + assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Ws) | (Frm[1:0]==2'b11&Ws)); + + if (`FPSIZES == 1) begin + + //NaN res selection depending on standard + if(`IEEE754) begin + assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + assign OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + assign UfRes = {Ws, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = {Ws, Re, Rf}; + + end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions? + if(`IEEE754) begin + assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; + assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; + assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else begin + assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end + + assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} : + OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; + assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + + end else if (`FPSIZES == 3) begin + always_comb + case (OutFmt) + `FMT: begin + if(`IEEE754) begin + XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Ws, Re, Rf}; + end + `FMT1: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; + ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; + InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end + OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; + UfRes = {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + end + `FMT2: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; + YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]}; + ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]}; + InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2{1'b1}}, (`NF2)'(0)}; + UfRes = {{`FLEN-`LEN2{1'b1}}, Ws, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`LEN2{1'b1}}, Ws, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; + end + default: begin + if(`IEEE754) begin + XNaNRes = (`FLEN)'(0); + YNaNRes = (`FLEN)'(0); + ZNaNRes = (`FLEN)'(0); + InvalidRes = (`FLEN)'(0); + end else begin + InvalidRes = (`FLEN)'(0); + end + OfRes = (`FLEN)'(0); + UfRes = (`FLEN)'(0); + NormRes = (`FLEN)'(0); + end + endcase + + end else if (`FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + if(`IEEE754) begin + XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Ws, Re, Rf}; + end + 2'h1: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; + YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]}; + ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]}; + InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + end + OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE{1'b1}}, (`D_NF)'(0)}; + UfRes = {{`FLEN-`D_LEN{1'b1}}, Ws, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`D_LEN{1'b1}}, Ws, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; + end + 2'h0: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; + YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]}; + ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]}; + InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE{1'b1}}, (`S_NF)'(0)}; + UfRes = {{`FLEN-`S_LEN{1'b1}}, Ws, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`S_LEN{1'b1}}, Ws, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; + end + 2'h2: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; + YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]}; + ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]}; + InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE{1'b1}}, (`H_NF)'(0)}; + // zero is exact fi dividing by infinity so don't add 1 + UfRes = {{`FLEN-`H_LEN{1'b1}}, Ws, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`H_LEN{1'b1}}, Ws, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; + end + endcase + + end + + + + + + // determine if you shoould kill the res - Cvt + // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 + // - dont set to zero if fp input is zero but not using the fp input + // - dont set to zero if int input is zero but not using the int input + assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); + assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); + // output infinity with result sign if divide by zero + if(`IEEE754) begin + assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes : + YNaN&~CvtOp ? YNaNRes : + ZNaN&FmaOp ? ZNaNRes : + Invalid ? InvalidRes : + SelOfRes ? OfRes : + KillRes ? UfRes : + NormRes; + end else begin + assign PostProcRes = NaNIn|Invalid ? InvalidRes : + SelOfRes ? OfRes : + KillRes ? UfRes : + NormRes; + end + + /////////////////////////////////////////////////////////////////////////////////////// + // + // ||||||||||| ||| ||| ||||||||||||| + // ||| |||||| ||| ||| + // ||| ||| ||| ||| ||| + // ||| ||| |||||| ||| + // ||||||||||| ||| ||| ||| + // + /////////////////////////////////////////////////////////////////////////////////////// + + // *** probably can optimize the negation + // select the overflow integer res + // - negitive infinity and out of range negitive input + // | int | long | + // signed | -2^31 | -2^63 | + // unsigned | 0 | 0 | + // + // - positive infinity and out of range positive input and NaNs + // | int | long | + // signed | 2^31-1 | 2^63-1 | + // unsigned | 2^32-1 | 2^64-1 | + // + // other: 32 bit unsinged res should be sign extended as if it were a signed number + assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive + Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive + Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive + {`XLEN{1'b1}};// unsigned positive + + + // select the integer output + // - if the input is invalid (out of bounds NaN or Inf) then output overflow res + // - if the input underflows + // - if rounding and signed opperation and negitive input, output -1 + // - otherwise output a rounded 0 + // - otherwise output the normal res (trmined and sign extended if nessisary) + assign FCvtIntRes = IntInvalid ? OfIntRes : + CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? + Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; +endmodule \ No newline at end of file From c60991f2bf6e3e0dc373aea7d0a0a427d3c2594a Mon Sep 17 00:00:00 2001 From: cturek Date: Tue, 12 Jul 2022 02:21:38 +0000 Subject: [PATCH 010/138] On the fly conversion for square root --- pipelined/srt/srt.sv | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 6e8cd5601..7886af477 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -229,7 +229,7 @@ module otfc2 #(parameter N=64) ( logic [N+1:0] QR, QMR; flopr #(N+3) Qreg(clk, Start, QNext, Q); - mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux); + mux2 #(`DIVLEN+3) Qmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux); flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); always_comb begin @@ -254,12 +254,36 @@ endmodule // Square Root OTFC, Radix 2 // /////////////////////////////// module softc2( - input logic clk, - input logic Start, - input logic sp, sn, - output logic S, + input logic clk, + input logic Start, + input logic sp, sn, + input logic [N+3:0] C, + output logic [N-1:0] Sq, ); + + // The on-the-fly converter transfers the square root + // bits to the quotient as they come. + logic [N+2:0] S, SM, SNext, SMNext, SMMux; + + flopr #(N+3) Sreg(clk, Start, SNext, S); + mux2 #(`DIVLEN+3) Smux(SMNext, {`DIVLEN+3{1'b1}}, Start, SMMux); + flop #(`DIVLEN+3) SMreg(clk, SMMux, SM); + + always_comb begin + if (sp) begin + SNext = S | ((C << 2) & ~(C << 1)); + SMNext = S; + end else if (sn) begin + SNext = SM | ((C << 2) & ~(C << 1)); + SMNext = SM; + end else begin // If sp and sn are not true, then sz is + SNext = S; + SMNext = SM | ((C << 2) & ~(C << 1)); + end + end + assign Sq = S[N+2] ? S[N+1:2] : S[N:1]; + endmodule ///////////// // counter // From 8edf44063a1d81fac5e83d5b1f85c586a52ebce1 Mon Sep 17 00:00:00 2001 From: cturek Date: Tue, 12 Jul 2022 22:18:56 +0000 Subject: [PATCH 011/138] C register and other various fixes --- pipelined/srt/srt.sv | 54 +++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 7886af477..7e6d22a41 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -54,13 +54,13 @@ module srt ( output logic [3:0] Flags ); - logic qp, qz, qm; // quotient is +1, 0, or -1 - logic [`NE-1:0] calcExp; - logic calcSign; - logic [`DIVLEN+3:0] X, Dpreproc; - logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; + logic qp, qz, qm; // quotient is +1, 0, or -1 + logic [`NE-1:0] calcExp; + logic calcSign; + logic [`DIVLEN+3:0] X, Dpreproc, C; + logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; - logic intSign; + logic intSign; srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign); @@ -91,7 +91,11 @@ module srt ( // Partial Product Generation csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); - otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + // If only implementing division, use divide otfc + // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + // otherwise use sotfc + creg sotfcC(clk, Start, C); + sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot); expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); @@ -138,9 +142,9 @@ module srtpreproc ( assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}}; assign DivX = Int ? PreprocA : PreprocX; - assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac}; + assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac}; - assign X = Sqrt ? SqrtX : {4'b0001, DivX}; + assign X = Sqrt ? {SqrtX, {(`EXTRAINTBITS-1){1'b0}}} : {4'b0001, DivX}; assign D = {4'b0001, Int ? PreprocB : PreprocY}; assign intExp = zeroCntB - zeroCntA + 1; assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); @@ -253,22 +257,22 @@ endmodule /////////////////////////////// // Square Root OTFC, Radix 2 // /////////////////////////////// -module softc2( +module sotfc2( input logic clk, input logic Start, input logic sp, sn, - input logic [N+3:0] C, - output logic [N-1:0] Sq, + input logic [`DIVLEN+3:0] C, + output logic [`DIVLEN-1:0] Sq, ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. - logic [N+2:0] S, SM, SNext, SMNext, SMMux; + logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; - flopr #(N+3) Sreg(clk, Start, SNext, S); - mux2 #(`DIVLEN+3) Smux(SMNext, {`DIVLEN+3{1'b1}}, Start, SMMux); - flop #(`DIVLEN+3) SMreg(clk, SMMux, SM); + flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM); + mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, (`DIVLEN){1'b0}}, Start, SMux); + flop #(`DIVLEN+4) SMreg(clk, SMux, M); always_comb begin if (sp) begin @@ -282,9 +286,23 @@ module softc2( SMNext = SM | ((C << 2) & ~(C << 1)); end end - assign Sq = S[N+2] ? S[N+1:2] : S[N:1]; + assign Sq = S[`DIVLEN-1:0]; endmodule + +////////////////////////// +// C Register for SOTFC // +////////////////////////// +module creg(input logic clk, + input logic Start, + output logic [`DIVLEN+3:0] C +); + logic [`DIVLEN+3:0] CMux; + + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, (`DIVLEN-2){1'b0}}, Start, CMux); + flop #(`DIVLEN+4) cflop(clk, CMux, C); +endmodule + ///////////// // counter // ///////////// @@ -293,7 +311,7 @@ module counter(input logic clk, input logic [$clog2(`XLEN+1)-1:0] dur, output logic done); - logic [$clog2(`XLEN+1)-1:0] count; + logic [$clog2(`XLEN+1)-1:0] count; // This block of control logic sequences the divider // through its iterations. You may modify it if you From 12a54161c060dab2a0edfd4a184738e9576ce579 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 12 Jul 2022 22:42:19 +0000 Subject: [PATCH 012/138] found the bug in the store modification --- pipelined/src/lsu/lsu.sv | 4 ++-- tests/riscof/spike/spike_rv32imc_isa.yaml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 50ecdb188..e9f41e653 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -235,8 +235,8 @@ module lsu ( .s(SelUncachedAdr), .y(LSUBusHWDATA)); if(CACHE_ENABLED) begin : dcache - if (`LLEN>`FLEN) - mux2 #(`LLEN) datamux({{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); + if (`LLEN>`XLEN) + mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); else assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), diff --git a/tests/riscof/spike/spike_rv32imc_isa.yaml b/tests/riscof/spike/spike_rv32imc_isa.yaml index 5a76fd978..04a5da180 100644 --- a/tests/riscof/spike/spike_rv32imc_isa.yaml +++ b/tests/riscof/spike/spike_rv32imc_isa.yaml @@ -1,11 +1,11 @@ hart_ids: [0] hart0: - ISA: RV32IMAFCZicsr_Zifencei + ISA: RV32IMAFDCZicsr_Zifencei physical_addr_sz: 32 User_Spec_Version: '2.3' supported_xlen: [32] misa: - reset-val: 0x40001125 + reset-val: 0x4000112D rv32: accessible: true mxl: @@ -23,6 +23,6 @@ hart0: warl: dependency_fields: [] legal: - - extensions[25:0] bitmask [0x0001125, 0x0000000] + - extensions[25:0] bitmask [0x000112D, 0x0000000] wr_illegal: - Unchanged \ No newline at end of file From b505ef135d2993a058f4d2c8fce427e6e723d4f4 Mon Sep 17 00:00:00 2001 From: cturek Date: Tue, 12 Jul 2022 22:45:54 +0000 Subject: [PATCH 013/138] Square root implemented --- pipelined/srt/srt.sv | 8 ++++---- pipelined/srt/testbench.sv | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 7e6d22a41..1037c9e24 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -92,10 +92,10 @@ module srt ( csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); // If only implementing division, use divide otfc - // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); // otherwise use sotfc - creg sotfcC(clk, Start, C); - sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot); + // creg sotfcC(clk, Start, C); + // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot); expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); @@ -272,7 +272,7 @@ module sotfc2( flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM); mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, (`DIVLEN){1'b0}}, Start, SMux); - flop #(`DIVLEN+4) SMreg(clk, SMux, M); + flop #(`DIVLEN+4) SMreg(clk, SMux, S); always_comb begin if (sp) begin diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 83f337073..1f6f15616 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -82,7 +82,7 @@ module testbench; // Equip Int test or Sqrt test assign Int = 1'b0; - assign Sqrt = 1'b1; + assign Sqrt = 1'b0; // Divider srt srt(.clk, .Start(req), @@ -111,7 +111,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("sqrttestvectors", Tests); + $readmemh ("testvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; From 8d5081e8e920d6ec7d2cb01020d821beb6481ef4 Mon Sep 17 00:00:00 2001 From: cturek Date: Tue, 12 Jul 2022 23:04:33 +0000 Subject: [PATCH 014/138] little fix --- pipelined/srt/srt.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 1037c9e24..b3de448f7 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -144,7 +144,7 @@ module srtpreproc ( assign DivX = Int ? PreprocA : PreprocX; assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac}; - assign X = Sqrt ? {SqrtX, {(`EXTRAINTBITS-1){1'b0}}} : {4'b0001, DivX}; + assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX}; assign D = {4'b0001, Int ? PreprocB : PreprocY}; assign intExp = zeroCntB - zeroCntA + 1; assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); From 3c1bea11049ab3b539c9bc83a3189175119ba15d Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 12 Jul 2022 18:32:17 -0700 Subject: [PATCH 015/138] removed warnings and took a mux out of the critical path --- pipelined/config/rv64fp/wally-config.vh | 2 +- pipelined/config/shared/wally-shared.vh | 13 +++++++------ pipelined/regression/wave-fpu.do | 3 --- pipelined/src/fpu/divsqrt.sv | 6 +++--- pipelined/src/fpu/fmashiftcalc.sv | 3 ++- pipelined/src/fpu/postprocess.sv | 2 +- pipelined/src/fpu/srt-radix4.sv | 26 +++++++++++++------------ pipelined/src/fpu/srtfsm.sv | 6 +++--- pipelined/src/fpu/srtpreproc.sv | 3 +-- pipelined/src/generic/lzc.sv | 2 +- pipelined/testbench/testbench-fp.sv | 8 ++++---- synthDC/scripts/synth.tcl | 2 +- 12 files changed, 38 insertions(+), 38 deletions(-) diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index cc8d1b2b8..8f13b2e36 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,7 +32,7 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 32 +`define XLEN 64 // IEEE 754 compliance `define IEEE754 0 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index c064783c2..54fa7a9bd 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -101,14 +101,15 @@ `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) // division constants -`define RADIX 4 -`define DIVCOPIES 4 +`define RADIX 32'h4 +`define DIVCOPIES 32'h4 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) -`define LOGR ((`RADIX==2) ? 1 : 2) -`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES)) -`define DURLEN ($clog2($rtoi(`FPDUR)+1)) -`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES) +`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) +// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) +`define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)) +`define DURLEN ($clog2(`FPDUR+1)) +`define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define USE_SRAM 0 diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 58f782bd6..9a3d7e061 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -24,9 +24,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/* -add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/* -add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/* -add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/* add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index c4f09aea5..8420baa13 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -52,7 +52,7 @@ module divsqrt( // output logic [`XLEN-1:0] RemM, ); - logic [`DIVLEN+3:0] WSN, WCN; + logic [`DIVLEN+3:0] NextWSN, NextWCN; logic [`DIVLEN+3:0] WS, WC; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DIVLEN-1:0] X; @@ -61,8 +61,8 @@ module divsqrt( srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, + srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, + srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index d4898e806..3c286b50f 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -53,7 +53,8 @@ module fmashiftcalc( assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent - assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4); + // ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4 + assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 18452abd0..30945532a 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -29,7 +29,7 @@ `include "wally-config.vh" -module postprocess( +module postprocess ( // general signals input logic Xs, Ys, // input signs input logic [`NE-1:0] Ze, // input exponents diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv index 1c7b96489..5a7e96e2a 100644 --- a/pipelined/src/fpu/srt-radix4.sv +++ b/pipelined/src/fpu/srt-radix4.sv @@ -41,7 +41,7 @@ module srtradix4( input logic [`DIVLEN-1:0] Dpreproc, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, output logic [`QLEN-1:0] Quot, - output logic [`DIVLEN+3:0] WSN, WCN, + output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] FirstWS, FirstWC, output logic [`NE+1:0] DivCalcExpM, output logic [`XLEN-1:0] Rem @@ -58,11 +58,12 @@ module srtradix4( logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2; logic [`NE+1:0] DivCalcExp; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; - logic [`QLEN-1:0] QMux, QMMux; + logic [`QLEN-1:0] QMMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -72,9 +73,11 @@ module srtradix4( // - otherwise load WSA into the flipflop // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]); - mux2 #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); + mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); @@ -88,10 +91,10 @@ module srtradix4( genvar i; generate - for(i=0; i<`DIVCOPIES; i++) begin + for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); - if(i<3) begin + if(i<(`DIVCOPIES-1)) begin assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; assign Q[i+1] = QNext[i]; @@ -101,9 +104,8 @@ module srtradix4( endgenerate // if starting a new divison set Q to 0 and QM to -1 - mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux); mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); - flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage + flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); flop #(`QLEN) QMreg(clk, QMMux, QM[0]); assign Quot = Q[0]; @@ -181,7 +183,7 @@ module qsel4 ( logic [3:0] QSel4[1023:0]; - initial begin + always_comb begin integer d, w, i, w2; for(d=0; d<8; d++) for(w=0; w<128; w++)begin @@ -270,9 +272,9 @@ module otfc4 ( // else if q = -2 Q = {QM, 10} QM = {QM, 01} // *** how does the 0 concatination numbers work? + assign QR = Q[`QLEN-3:0]; + assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM always_comb begin - QR = Q[`QLEN-3:0]; - QMR = QM[`QLEN-3:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; QMNext = {QR, 2'b01}; @@ -352,5 +354,5 @@ module expcalc( endcase end // correct exponent for denormalized input's normalization shifts - assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; + assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index fc73cf710..21e35c365 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -33,7 +33,7 @@ module srtfsm( input logic clk, input logic reset, - input logic [`DIVLEN+3:0] WSN, WCN, WS, WC, + input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC, input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, @@ -58,8 +58,8 @@ module srtfsm( //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); assign DivBusy = (state == BUSY); - assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0}); - assign DivStickyE = ~WZero; + assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0}); + assign DivStickyE = |W; assign DivDone = (state == DONE); assign W = WC+WS; assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index fa76c0511..7386332f8 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -63,8 +63,7 @@ module srtpreproc ( assign X = PreprocX; assign Dpreproc = PreprocY; - - assign Dur = (`DURLEN)'($rtoi(`FPDUR)); + assign Dur = (`DURLEN)'(`FPDUR); // assign intExp = zeroCntB - zeroCntA + 1; // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv index 9f6e59811..71aabbc61 100644 --- a/pipelined/src/generic/lzc.sv +++ b/pipelined/src/generic/lzc.sv @@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) ( /* verilator lint_off CMPCONST */ /* verilator lint_off WIDTH */ - int i; + logic [31:0] i; always_comb begin i = 0; while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index ba14499e0..2aec1ab15 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -87,8 +87,8 @@ module testbenchfp; logic reset = 1'b0; logic [`DIVLEN-1:0] DivX; logic [`DIVLEN-1:0] Dpreproc; - logic [`DIVLEN+3:0] WSN, WS; - logic [`DIVLEN+3:0] WCN, WC; + logic [`DIVLEN+3:0] NextWSN, WS; + logic [`DIVLEN+3:0] NextWCN, WC; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DURLEN-1:0] Dur; @@ -696,9 +696,9 @@ module testbenchfp; .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), + srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), + srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); assign CmpFlg[3:0] = 0; diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 9f2b46478..251522dc8 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -347,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets - redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 } redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" } -redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 } +redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 } redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 } From b45b3baec2023ff93ceebd1e7af53aab2eaa4f48 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 13 Jul 2022 09:41:35 -0700 Subject: [PATCH 016/138] removed the +1 in the cvt --- pipelined/src/fpu/fcvt.sv | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 2d9fc21cb..4820cf284 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -68,7 +68,8 @@ module fcvt ( logic Signed; // is the opperation with a signed integer? logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? - logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC + logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder) + logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC // seperate OpCtrl for code readability @@ -102,10 +103,11 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign LzcIn = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} : - {Xm[`NF-1:0], {`CVTLEN-`NF{1'b0}}}; + assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} : + {Xm, {`CVTLEN-`NF{1'b0}}}; + assign LzcIn = LzcInFull[`CVTLEN-1:0]; - lzc #(`CVTLEN) lzc (.num(LzcIn), .ZeroCnt(LeadingZeros)); + lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros)); /////////////////////////////////////////////////////////////////////////// // shifter @@ -119,13 +121,13 @@ module fcvt ( // denormalized/undeflowed result fp -> fp: // - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0 // ??? -> fp: - // - shift left by LeadingZeros+1 - to shift till the result is normalized + // - shift left by LeadingZeros - to shift till the result is normalized // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} : ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : - (LeadingZeros+1)&{`LOGCVTLEN{XDenorm|IntToFp}}; + (LeadingZeros)&{`LOGCVTLEN{XDenorm|IntToFp}}; /////////////////////////////////////////////////////////////////////////// // exp calculations @@ -197,14 +199,14 @@ module fcvt ( // | 0's | Mantissa | 0's if nessisary | // | keep | // - // - if the input is denormalized then we dont shift... so the "- (LeadingZeros+1)" is just leftovers from other options - // int -> fp : largest bias + XLEN - Largest bias + new bias - 1 - LeadingZeros = XLEN + NewBias - 1 - LeadingZeros + // - if the input is denormalized then we dont shift... so the "- LeadingZeros" is just leftovers from other options + // int -> fp : largest bias + XLEN - Largest bias + new bias - LeadingZeros = XLEN + NewBias - LeadingZeros // Process: // - shifted right by XLEN (XLEN) - // - shift left to normilize (-1-LeadingZeros) + // - shift left to normilize (-LeadingZeros) // - newBias to make the biased exponent - // oldexp - biasold +newbias - (LeadingZeros+1)&(XDenorm|IntToFp) - assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenorm|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})}; + // oldexp - biasold +newbias - LeadingZeros&(XDenorm|IntToFp) + assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})}; // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion From 6e96ca2c9b5e46ea2b56da1d0e98f854f77de103 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 17:47:27 +0000 Subject: [PATCH 017/138] Added adder input selection to on the fly converter --- pipelined/srt/srt.sv | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index b3de448f7..f04aa718f 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -87,6 +87,7 @@ module srt ( // Divisor Selection logic assign Db = ~D; mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel); + fsel2 fsel(qp, qn, ) // Partial Product Generation csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); @@ -95,7 +96,7 @@ module srt ( otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); // otherwise use sotfc // creg sotfcC(clk, Start, C); - // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot); + // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F); expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); @@ -263,6 +264,7 @@ module sotfc2( input logic sp, sn, input logic [`DIVLEN+3:0] C, output logic [`DIVLEN-1:0] Sq, + output logic [`DIVLEN+3:0] F, ); @@ -288,6 +290,8 @@ module sotfc2( end assign Sq = S[`DIVLEN-1:0]; + fsel2 fsel(sp, sn, C, S, SM, F); + endmodule ////////////////////////// From 120994b42bc6b5e238d0b0e4e1e8801a50c5772e Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 17:56:23 +0000 Subject: [PATCH 018/138] Finalized sqrt, ready for debugging --- pipelined/srt/srt.sv | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index f04aa718f..238692256 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -57,7 +57,7 @@ module srt ( logic qp, qz, qm; // quotient is +1, 0, or -1 logic [`NE-1:0] calcExp; logic calcSign; - logic [`DIVLEN+3:0] X, Dpreproc, C; + logic [`DIVLEN+3:0] X, Dpreproc, C, F, AddIn; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; logic intSign; @@ -87,17 +87,19 @@ module srt ( // Divisor Selection logic assign Db = ~D; mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel); - fsel2 fsel(qp, qn, ) + + // If only implementing division, use divide otfc + // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + // otherwise use sotfc + creg sotfcC(clk, Start, C); + sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F); + + // Adder input selection + assign AddIn = Sqrt ? F : Dsel; // Partial Product Generation - csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); + csa #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA); - // If only implementing division, use divide otfc - otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); - // otherwise use sotfc - // creg sotfcC(clk, Start, C); - // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F); - expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); signcalc signcalc(.XSign, .YSign, .calcSign); From 3ed6b8d1ff1e37be32ded73406b47494cee6abd8 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 18:11:13 +0000 Subject: [PATCH 019/138] Test generation files in common format --- pipelined/srt/exptestgen.c | 4 ++++ pipelined/srt/sqrttestgen | Bin 22792 -> 22792 bytes pipelined/srt/sqrttestgen.c | 17 +++++++++++------ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c index 61fe74aa4..d6bebb774 100644 --- a/pipelined/srt/exptestgen.c +++ b/pipelined/srt/exptestgen.c @@ -96,6 +96,10 @@ void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, // Print r in standard double format fprintf(fptr, "%03x", rExp|(rSign<<11)); printhex(fptr, rFrac); + fprintf(fptr, "_"); + + // Spacing for testbench, value doesn't matter + fprintf(fptr, "%016x", 0); fprintf(fptr, "\n"); } diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen index dadc5dc5c8d36055ee1c68c8296279c302aa4efb..06615165395cb6abb37c9d60152d5fe70f15e11a 100755 GIT binary patch delta 1496 zcmZwFUu+ab7y$5@y+8D>dwYBQyH=zeO}P{r=pj&o!QPT~+YM1tlqL57$b-gGeJCc{ zKAL_2l*X%rF5edal=QY zZ}9UV%d5MF@h%wydfw>2HvS^M=PLiUZPD70h+A4~H!Mqoqq|MHAFeT6moMdDNNru-Uz9)6K`SB*ZJ=lh4Y z-J{RDd)}Jg^7esL*q|9-9t_6DLn3EhFRuv?8V(|C8Is9$(?{e7e7_mbRP9xeqw z^TjsTb+UDxjKKlr_40q>W}*BUKQkoC!}ycwqxhSL-;~p96LYpbh-)d!+jhIy4Av*G zUch<+>oxF6FzhSA&#iIBI^mmO&4#l$rWX5H;}TWHDu(|n*oNP_P^$3Ri2t!!SgJ;O z9qdroEZdHUJSDn!DQd6O5b#PlcvXFlPr#RIGtXH(^sx-ns}0Fetfwp9Tj4YCKtySQ zTU#3Z*?=Qmu`0IS;EUd-z(~yox!Ao82SInus-Pr>3W=gSLH`kh+AD8c2ezN^`t)KD z=2`W)pt`=w*Fya!+=?%QNxs}VR=<;T1G&N#WC^UpA80GxAr?_BN!#S&S=ihVmN4n= zhC7J0jp3(?sSC@&2giHOq`=||i|r9=$8EMXdO3dyj_Mse0}u3CNmJm7ragQLjx@#b zlwhPOReqQjR~#SpG8STDRmYpdn0u)CRQYXLD|Fwapbm^#$^z`1waXZm`OENNU#-7* m+Bk2eSriQRngYvTg delta 1453 zcmYk+Uu;uV7y$5dZr9m%_x9fFI@?ZRby*g0VMAt|@z2`1b-kG)CUVsn6nz2~6E7xa zJWN(Ige}Bv_&FRt5O`5p)DWs>zH}H8I(@*A5Y&*EzEryeO+!*CI(Pg|@7J-D_I~$w z&Uel|=iJjyT;LNI_{_53Fj=)2Cw^Tt9o>baC#p9NoZ~k_W#<3x+OhUvHu24e9~XOu z%Aqe$XXcdGyuOl_U{>z?G_2k8^Vl&jW4Cnk{e%$WmTq{*$QF5o{5<(B^2_9XFCSHo}5L3uOI~ zv7>eFau?Rz<*t!0lK&yEwlDvgHMc$6iCDn@XFqVx36Kt^=@_{XUJNO%3<%eNk!!~8f11B5a z;Wnu1rk%U+ELq2s6>IYH;$FNjw*D)`al0hzvNjH2P39Klc`HV-0e3i=chhsl(1xx7 znehi)QQzeA@Pszh_R7YtJN^-3Jh@so{_bk`;g4p1xMW+Ch2kij)7t%=ddm?xK7(Z~ z65egZm~DNqO~x0V-CD55Z~BU z&fgYd4hM58LO2-DV3fnZct?nf7)Mrx$YWIh6M}KZZeWaIH1ItoFs7l&=-?ITGaBy; zVP8C2)zXI70sI=bVc2NrQ*gp)X`IF#nU&3bLF1^>uhlCvaLL%om%ubr-V#2_n|Z^W zR>3j%KxvbuRJ6?Fo2Q!m75r&d3ta8)52Q(DEwlNNANjfq8q9Y*q}G}XmyGt!qs>q51S`9{0<{(6xUKf1rjN$;1>H{j;eg%cuPxkx Pvvv>6*+!;l$2|W7ZMD|( diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c index b4ece1473..710fc32f3 100644 --- a/pipelined/srt/sqrttestgen.c +++ b/pipelined/srt/sqrttestgen.c @@ -33,11 +33,7 @@ void main(void) double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625, 1.75, 1.875, 1.99999, 1.1, 1.2, 1.01, 1.001, 1.0001, -<<<<<<< Updated upstream - 1/1.1, 1/1.5, 1/1.25, 1/1.125}; -======= 2/1.1, 2/1.5, 2/1.25, 2/1.125}; ->>>>>>> Stashed changes double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; int i; @@ -69,14 +65,23 @@ void main(void) void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac) { + // Print a in standard double format fprintf(fptr, "%03x", aExp); printhex(fptr, aFrac); fprintf(fptr, "_"); + + // Spacing for testbench, value doesn't matter + fprintf(fptr, "%016x", 0); + fprintf(fptr, "_"); + + // Print r in standard double format fprintf(fptr, "%03x", rExp); printhex(fptr, rFrac); + fprintf(fptr, "_"); + + // Spacing for testbench, value doesn't matter + fprintf(fptr, "%016x", 0); fprintf(fptr, "\n"); - - } void printhex(FILE *fptr, double m) From 5975d0d470dc0fa71c7d7e4e754cd953d108cde1 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 18:30:18 +0000 Subject: [PATCH 020/138] Testbench accepts standard test vector files --- pipelined/srt/testbench.sv | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 1f6f15616..537fbb3e1 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -55,21 +55,11 @@ module testbench; parameter MEM_SIZE = 40000; parameter MEM_WIDTH = 64+64+64+64; - // INT TEST SIZES - // `define memrem 63:0 - // `define memr 127:64 - // `define memb 191:128 - // `define mema 255:192 - - // FLOAT TEST SIZES - // `define memr 63:0 - // `define memb 127:64 - // `define mema 191:128 - - // SQRT TEST SIZES - `define memr 63:0 - `define mema 127:64 + // Test sizes + `define memrem 63:0 + `define memr 127:64 `define memb 191:128 + `define mema 255:192 // Test logicisters logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file From b1906399aae6516a9019b2f4144e29fc139cc01e Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 19:34:04 +0000 Subject: [PATCH 021/138] Lint error fixed and added comments to preprocessing --- pipelined/srt/srt.sv | 62 +++++++++++++++++++++++--------------- pipelined/srt/testbench.sv | 2 +- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 238692256..3e41c16c7 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -54,13 +54,14 @@ module srt ( output logic [3:0] Flags ); - logic qp, qz, qm; // quotient is +1, 0, or -1 + logic qp, qz, qn; // quotient is +1, 0, or -1 logic [`NE-1:0] calcExp; logic calcSign; logic [`DIVLEN+3:0] X, Dpreproc, C, F, AddIn; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; logic intSign; + logic cin; srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign); @@ -76,29 +77,30 @@ module srt ( // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm); + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qn); flopen #(`NE) expflop(clk, Start, calcExp, rExp); flopen #(1) signflop(clk, Start, calcSign, rsign); flopen #(7) durflop(clk, Start, calcDur, dur); - counter divcounter(clk, Start, dur, done); + srtcounter divcounter(clk, Start, dur, done); // Divisor Selection logic assign Db = ~D; - mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel); + mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qn, Dsel); // If only implementing division, use divide otfc - // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot); // otherwise use sotfc - creg sotfcC(clk, Start, C); - sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F); + creg sotfcC(clk, Start, C); + sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F); // Adder input selection assign AddIn = Sqrt ? F : Dsel; // Partial Product Generation - csa #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA); + assign cin = ~Sqrt & qp; + csa #(`DIVLEN+4) csa(WS, WC, AddIn, cin, WSA, WCA); expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); @@ -128,30 +130,40 @@ module srtpreproc ( logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; logic [`XLEN-1:0] PosA, PosB; - logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX; + logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX; + logic [`NF+4:0] SqrtX; + // Generate positive integer inputs if they are signed assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; + // Calculate leading zeros of integer inputs lzc #(`XLEN) lzcA (PosA, zeroCntA); lzc #(`XLEN) lzcB (PosB, zeroCntB); + // Make integers have DIVLEN bits assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}}; assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}}; + // Shift integers to have leading ones assign PreprocA = ExtraA << (zeroCntA + 1); assign PreprocB = ExtraB << (zeroCntB + 1); + + // Make mantissas have DIVLEN bits assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}}; assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}}; + // Selecting correct divider inputs assign DivX = Int ? PreprocA : PreprocX; assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac}; - assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX}; assign D = {4'b0001, Int ? PreprocB : PreprocY}; + + // Integer exponent and sign calculations assign intExp = zeroCntB - zeroCntA + 1; assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); + // Number of cycles of divider assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2); endmodule @@ -160,7 +172,7 @@ endmodule ///////////////////////////////// module qsel2 ( // *** eventually just change to 4 bits input logic [`DIVLEN+3:`DIVLEN] ps, pc, - output logic qp, qz, qm + output logic qp, qz, qn ); logic [`DIVLEN+3:`DIVLEN] p, g; @@ -187,7 +199,7 @@ module qsel2 ( // *** eventually just change to 4 bits // Produce quotient = +1, 0, or -1 assign #1 qp = magnitude & ~sign; assign #1 qz = ~magnitude; - assign #1 qm = magnitude & sign; + assign #1 qn = magnitude & sign; endmodule //////////////////////////////////// @@ -198,15 +210,16 @@ module fsel2 ( input logic [`DIVLEN+3:0] C, S, SM, output logic [`DIVLEN+3:0] F ); - logic [`DIVLEN+3:0] FP, FN; + logic [`DIVLEN+3:0] FP, FN, FZ; // Generate for both positive and negative bits assign FP = ~S & C; assign FN = SM | (C & (~C << 2)); + assign FZ = {(`DIVLEN+4){1'B0}}; // Choose which adder input will be used - assign F = sp ? FP : (sn ? FN : (`DIVLEN+4){1'b0}); + assign F = sp ? FP : (sn ? FN : FZ); endmodule @@ -216,7 +229,7 @@ endmodule module otfc2 #(parameter N=64) ( input logic clk, input logic Start, - input logic qp, qz, qm, + input logic qp, qz, qn, output logic [N-1:0] r ); @@ -236,7 +249,7 @@ module otfc2 #(parameter N=64) ( logic [N+1:0] QR, QMR; flopr #(N+3) Qreg(clk, Start, QNext, Q); - mux2 #(`DIVLEN+3) Qmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux); + mux2 #(`DIVLEN+3) Qmux(QMNext, {(`DIVLEN+3){1'b1}}, Start, QMMux); flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); always_comb begin @@ -248,7 +261,7 @@ module otfc2 #(parameter N=64) ( end else if (qz) begin QNext = {QR, 1'b0}; QMNext = {QMR, 1'b1}; - end else begin // If qp and qz are not true, then qm is + end else begin // If qp and qz are not true, then qn is QNext = {QMR, 1'b1}; QMNext = {QMR, 1'b0}; end @@ -266,7 +279,7 @@ module sotfc2( input logic sp, sn, input logic [`DIVLEN+3:0] C, output logic [`DIVLEN-1:0] Sq, - output logic [`DIVLEN+3:0] F, + output logic [`DIVLEN+3:0] F ); @@ -275,7 +288,7 @@ module sotfc2( logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM); - mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, (`DIVLEN){1'b0}}, Start, SMux); + mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux); flop #(`DIVLEN+4) SMreg(clk, SMux, S); always_comb begin @@ -305,17 +318,18 @@ module creg(input logic clk, ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, (`DIVLEN-2){1'b0}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule ///////////// // counter // ///////////// -module counter(input logic clk, - input logic req, - input logic [$clog2(`XLEN+1)-1:0] dur, - output logic done); +module srtcounter(input logic clk, + input logic req, + input logic [$clog2(`XLEN+1)-1:0] dur, + output logic done +); logic [$clog2(`XLEN+1)-1:0] count; diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 537fbb3e1..b83e6b00e 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("testvectors", Tests); + $readmemh ("sqrttestvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; From 97a1548356ed2147088038b0afb728b4c107871a Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 19:46:48 +0000 Subject: [PATCH 022/138] radix 4 files removed from srt and divlen modified for sqrt --- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/srt/lint-srt | 1 - pipelined/srt/qslc_r4a2.c | 198 ----- pipelined/srt/qslc_r4a2b | Bin 16064 -> 0 bytes pipelined/srt/qslc_r4a2b.c | 190 ----- pipelined/srt/qslc_r4a2b.tv | 1024 ---------------------- pipelined/srt/qslc_sqrt_r4a2 | Bin 16152 -> 0 bytes pipelined/srt/qslc_sqrt_r4a2.c | 198 ----- pipelined/srt/qslc_sqrt_r4a2.sv | 1026 ----------------------- pipelined/srt/srt.sv | 4 +- 10 files changed, 3 insertions(+), 2640 deletions(-) delete mode 100644 pipelined/srt/qslc_r4a2.c delete mode 100755 pipelined/srt/qslc_r4a2b delete mode 100644 pipelined/srt/qslc_r4a2b.c delete mode 100644 pipelined/srt/qslc_r4a2b.tv delete mode 100755 pipelined/srt/qslc_sqrt_r4a2 delete mode 100644 pipelined/srt/qslc_sqrt_r4a2.c delete mode 100644 pipelined/srt/qslc_sqrt_r4a2.sv diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 54fa7a9bd..ad52be2e7 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -103,7 +103,7 @@ // division constants `define RADIX 32'h4 `define DIVCOPIES 32'h4 -`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) +`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 1)) `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) diff --git a/pipelined/srt/lint-srt b/pipelined/srt/lint-srt index fd42df887..399201be0 100755 --- a/pipelined/srt/lint-srt +++ b/pipelined/srt/lint-srt @@ -1,2 +1 @@ verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv -verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv diff --git a/pipelined/srt/qslc_r4a2.c b/pipelined/srt/qslc_r4a2.c deleted file mode 100644 index 8e68f9983..000000000 --- a/pipelined/srt/qslc_r4a2.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - Program: qslc_r4a2.c - Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) - User: James E. Stine - -*/ - -#include -#include - -#define DIVISOR_SIZE 3 -#define CARRY_SIZE 7 -#define SUM_SIZE 7 -#define TOT_SIZE 7 - -void disp_binary(double, int, int); - -struct bits { - unsigned int divisor : DIVISOR_SIZE; - int tot : TOT_SIZE; -} pla; - -/* - - Function: disp_binary - Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number - Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point - Return value: none - -*/ -void disp_binary(double x, int bits_to_left, int bits_to_right) { - int i; - double diff; - - if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - printf("0"); - } - if (i == bits_to_right+1) - ; - - return; - } - - if (x < 0.0) - x = pow(2.0, ((double) bits_to_left)) + x; - - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - diff = pow(2.0, ((double) -i) ); - if (x < diff) - printf("0"); - else { - printf("1"); - x -= diff; - } - if (i == 0) - ; - - } - -} - -int main() { - int m; - int n; - int o; - pla.divisor = 0; - pla.tot = 0; - printf("\tcase({D[5:3],Wmsbs})\n"); - for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { - for (m=0; m < pow(2.0, TOT_SIZE); m++) { - printf("\t\t10'b"); - disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); - printf("_"); - disp_binary((double) pla.tot, TOT_SIZE, 0); - printf(": q = 4'b"); - - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - */ - switch (pla.divisor) { - case 0: - if ((pla.tot) >= 12) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -4) - printf("0000"); - else if ((pla.tot) >= -13) - printf("0010"); - else - printf("0001"); - break; - case 1: - if ((pla.tot) >= 14) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -15) - printf("0010"); - else - printf("0001"); - break; - case 2: - if ((pla.tot) >= 15) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -16) - printf("0010"); - else - printf("0001"); - break; - case 3: - if ((pla.tot) >= 16) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -18) - printf("0010"); - else - printf("0001"); - break; - case 4: - if ((pla.tot) >= 18) - printf("1000"); - else if ((pla.tot) >= 6) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -20) - printf("0010"); - else - printf("0001"); - break; - case 5: - if ((pla.tot) >= 20) - printf("1000"); - else if ((pla.tot) >= 6) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -20) - printf("0010"); - else - printf("0001"); - break; - case 6: - if ((pla.tot) >= 20) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -22) - printf("0010"); - else - printf("0001"); - break; - case 7: - if ((pla.tot) >= 24) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -24) - printf("0010"); - else - printf("0001"); - break; - default: printf ("XXX"); - - } - - printf(";\n"); - (pla.tot)++; - } - (pla.divisor)++; - } - printf("\tendcase\n"); - -} diff --git a/pipelined/srt/qslc_r4a2b b/pipelined/srt/qslc_r4a2b deleted file mode 100755 index f719bbf471bfc1094ffe5bdd0d6441d7c2426e9d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16064 zcmeHOeQ;FO6~CLXB*BmkpF;d#p-TBMO9COq4>SZ87AFYApwem|o9rgpHrb848wm~{ zv8k05VXVy9I8*$n(-B*(`GfIeoJx=?GPM@Qahlp0jSkFg@B;`~SB<>>&b{}%ygUa;WcV^8M2bI&=ybMJZQ-uH6vzWa9gt5#;YT!JZE+$czDFI1WeHvE7KLR5$)Vm!_> z#Fb(!)MIc|XtR^|W+?;nT#d^HC%>v#9Zj>AU`)5wCv>T zbzscdZnftnXOLH(EnY(a5Ocd9h4MtPG>a#e4(Ba8FKwE$ABp7Gt^K;SA9KNY%{iZx z6FQdYd^&jvY(&hRevR7C$yX>h!JOJ4y7go-^j+Fpr~Lw6S&eY=8xWG`kIZ#H`ajpNc_jH168`dK)> zMaJryHAW~L4>v>-$#A@8&GM#LG+Yy`Z3+v+XlRZ_jYKjSPa1}h&=G+xvCRP2wFQm( zNHo|KxtDy=vvu0HIT(qGmeyolV=ykDZ>A0|r&66Nt*TmGx!hRjTX?PD;-WIExX`zl z;?6=Zxp4Qn@W*{c1muYy{Yv)c3FSSrl;|rp-cuc8OUTHlYvYIG8xEZAd`XrZ3fPHuKX&{@kwrAm-`tmdssGXE`Av50rTp;V4fL*@Qq{{=>O0(&4;8+i-dvx<17QcPoh@{ ziHCfbL*8JQOFv!RdHlzL&W{2eC(f*^@t5|L9u4eXO7l~5$|4}o*ZVv(elFvIej_x2 z-R^2)*QS$G5c*UqBlJA8&oiT)fQK~)ey+qXo<(f#g+S+-z@g7>3LHvj2V8Fij$cSl zMg=E*s)Agzuii5QakD>NU;9!ri`MxY0v$_Vqhg@*WHLXnYv~&hQhB)UR0yXx+<$`X zx(mL;{QEbACg@%;F;i0b#6P++1fYYDfFk#dA5&#sz8W(Pt75+_3rZbebHO* zIo&hhnb(8x9+Ad5Z`KDq(Vo;x7fchS?L8zOtM@F}(0k9Em#7ow`%iTG&wpHSumnP? z9k>qvdDkxg`Ro0C+fVgB3oX?O7k}!WYjgwt6KJ7qSPQqo6OpY+4S4R55vA!H-s#?mg6BndHCX-CIfM0`?o}ur@eiU_(SJKuDo6ZjS)PhAJm*Ny9M20q z9qAm;1J7cu(b*H&J^6kDVKj{yFT}1fS%I#=?kVYfs9eWx#9(^SeHts*Lw#fG52f>N zxMaKM;1b|dvk`ZH9CZNjwhfRCKp6qt1X!;C&)EQjTRkcZCk6p1QGf?+fO7!QFAS-} z1n>Z$GIfAN`h^EA-f6>+=aBXi-~%k(0Jz=;utJIx;8g`EvH`y0kk%7mp8|Y2f9R^u za!4fv*b0COX(oxy3V+mwFW`^_0jd;WzYSo8^ugBv%vXR1Yyc~yR|zmq0qSjlGaS-B z0`%iSp+ee2;(HL0gmN3+!yz>j;AI7vVgpzqRS}>|0nX1GN`e*Ad;)A%fOl+w&p4!U z1h@?V71GyVL0qNqPucJjIHdOoFk1nBW&^kZkYnj(0=N|*Xal6VcHRbjkB<#7k^Kp?5-Q9d}1O20!1-ql0i9eq1GdR#cWdXA;(G1s?a z_vF`srG(4_j-Hcw$MB@Q0z!vB?dsT;-r?EPhn~XoF_)BmIkJiz6gppn^4$03<2LWu zPIrom{y{hm=6Loz4yTu>?xTlsqKCAcLs+v0GG!=}flLN68OUTHlYvYI{$DdN^u|g> zSa8lXpTtpi-ZYQk_~GZK`6-U}f0^c_FR+ePw8L>d6|T#&ucvpm^e(9Pv}v}0=bkam zF7RpK-QY{WX&y84tZBXpe)|9x&frA!Zj{J%Z?$l>6}m3V&&hoV(>>B(j)Ur)O@I|S z@|D?DDAGJ@6ZEv$6$OPW3#Q%dxny&0ySS<7x_OJPnnh~zzY|9zaMF>KAkrsskiQq4 ziQ0M)$9lwCj`oGWpzs%2%g2w+E_NX#8OyU!H{cq6i>2j|{ZTjBGnC0dCIguaWHOM+ zKqdp33}iBp$v`FpnGAf_4Ddcb-q%M<5+d4LNW?PjB_zsM%-UFp*IJTROjCTimU%zn zWm@KyOOcj&Um>lBh}i!51v5r+H(tsR(GrNL3$Lb#crReDw$rcGM9(NDF40_vBc^DX znRXN1dS4)3dq|RNJd|kNM#OsyFIA#w)D5$KzYdJ!?$NkB&AI(6wahrYyph+3yh;_` zWlH%DT_2XeQs%g@Oq6n^w&PcSDf4yc>i=uZ+6V5au|zL!E9{G02j*g8ZiTY0TE6@` z@0<;_t^m$qNLS0yF`&FtWYxFFSNVbC8i1<=gKO?RurgyzVBGSQ{tys z`*2zRGzufdKiYL2ko8O!>t=EdLwuxqUX%Dsg|pxPBHbog`{%iq_a%Osb^oybJPH$# zhshO6qiKMR zP%u5F@r4fj?=}8$eR1+PBH*9b`iS12Eq_~r{uSscPG>zwp!dS=qz|E=mhZ-I+VGJs4D zgWjEJsx#tc!G*Pm$}m<|FI(d`{I{&2-Tf;z-m+}X>g7=5qX@$v(AL0;YGJIZT3flS z%2>N{<*oi2qh?uUmERC8O+nt&U%|WfX{-LXw*FgI?CHPg_XgomFo`;2n=KD~5FjrY zm6A<;PXJ-~hY3a~5i=Tt(GY#KuzD>}p-9wdO@u=NIl{@PO(e9h{1zb;NwgTX_*Mbw zGYow6z)7I5Ax8YfVfg0_L-En|QksSECAKstgSFtvxZ;gmibU~gMT_u7W67|uA=>I| ziN{*P@#GepsvhK!%%L<2p!=r`p;D~f5NSY{~3y$-?Q=jNt-2>%f!EjgXS#kKiYR&geBs6 zijssVN5Ssz{C!Cw=4}73Fi`vK zAMh$9^Ba_Cot^Ff6BfJwPOpM8f6MYz1s*@FK8XUhj>>zGbiA(Q?=78pmYJW0kH_ZE z^YQ(~RNK&aRnm~m{@=hrW0%)kJWuBDXE`z4jqJyH`~!UH`YnIG?(ALwC1{;u)|vg8 zkA<_{UsPyliQ?Q2SHONO(>$AW+&<3(3$?!!@9e(`x_u4jY6bhxwlu0hu@c!ij~!)E z=Owzr{W`EKpr$D;$HUK`Jm@%f<*Cknv>*dzIV$INm6hh=Bl~|OzpNCZ$l>4+#lHca C!tRp* diff --git a/pipelined/srt/qslc_r4a2b.c b/pipelined/srt/qslc_r4a2b.c deleted file mode 100644 index 94a3a4cd4..000000000 --- a/pipelined/srt/qslc_r4a2b.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - Program: qslc_r4a2.c - Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) - User: James E. Stine - -*/ - -#include -#include - -#define DIVISOR_SIZE 3 -#define CARRY_SIZE 7 -#define SUM_SIZE 7 -#define TOT_SIZE 7 - -void disp_binary(double, int, int); - -struct bits { - unsigned int divisor : DIVISOR_SIZE; - int tot : TOT_SIZE; -} pla; - -/* - - Function: disp_binary - Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number - Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point - Return value: none - -*/ -void disp_binary(double x, int bits_to_left, int bits_to_right) { - int i; - double diff; - - if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - printf("0"); - } - if (i == bits_to_right+1) - ; - - return; - } - - if (x < 0.0) - x = pow(2.0, ((double) bits_to_left)) + x; - - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - diff = pow(2.0, ((double) -i) ); - if (x < diff) - printf("0"); - else { - printf("1"); - x -= diff; - } - if (i == 0) - ; - - } - -} - -int main() { - int m; - int n; - int o; - pla.divisor = 0; - pla.tot = 0; - for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { - for (m=0; m < pow(2.0, TOT_SIZE); m++) { - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - */ - switch (pla.divisor) { - case 0: - if ((pla.tot) >= 12) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -4) - printf("0"); - else if ((pla.tot) >= -13) - printf("2"); - else - printf("1"); - break; - case 1: - if ((pla.tot) >= 14) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -6) - printf("0"); - else if ((pla.tot) >= -15) - printf("2"); - else - printf("1"); - break; - case 2: - if ((pla.tot) >= 15) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -6) - printf("0"); - else if ((pla.tot) >= -16) - printf("2"); - else - printf("1"); - break; - case 3: - if ((pla.tot) >= 16) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -6) - printf("0"); - else if ((pla.tot) >= -18) - printf("2"); - else - printf("1"); - break; - case 4: - if ((pla.tot) >= 18) - printf("8"); - else if ((pla.tot) >= 6) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -20) - printf("2"); - else - printf("1"); - break; - case 5: - if ((pla.tot) >= 20) - printf("8"); - else if ((pla.tot) >= 6) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -20) - printf("2"); - else - printf("1"); - break; - case 6: - if ((pla.tot) >= 20) - printf("8"); - else if ((pla.tot) >= 8) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -22) - printf("2"); - else - printf("1"); - break; - case 7: - if ((pla.tot) >= 24) - printf("8"); - else if ((pla.tot) >= 8) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -24) - printf("2"); - else - printf("1"); - break; - default: printf ("X"); - - } - - printf("\n"); - (pla.tot)++; - } - (pla.divisor)++; - } - -} diff --git a/pipelined/srt/qslc_r4a2b.tv b/pipelined/srt/qslc_r4a2b.tv deleted file mode 100644 index b92d81e8e..000000000 --- a/pipelined/srt/qslc_r4a2b.tv +++ /dev/null @@ -1,1024 +0,0 @@ -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2 deleted file mode 100755 index 5cff70cdf9d63dd415b92ba2ce9092b7da87695f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16152 zcmeHOdvsLA8J`WrB!X;$pnwYN1Lc(^;Z;B$0kUD^0i_5BeBNxbn`DnkcHQ0ZQbIJM zMEPpw*|r#-UxKii_>%XBh5Jh6HI0NSiV!SvF z<)Koi>_tkzs!~Trf0?cILm-JahB5=lbCp`K(#OMN-T^qOG#oLSRr{P{J(y82D|o952RpEJuH4Y_B?CQ!RvIA}~QzkDrS zmN@Bj7)3^-DziVbjna0)U$iKH%R&F^>xSLk_;$f#_cYy_^Vu}QP`imk7)s=iFFBPd zpM!%iDjv5Wr*tF_1b$mfaql zCCZoKxL^Riuu9jgwETfcpeh)R1tM!!E~^RE1=je=YXZWus%k@ZRy5{|#4Jle2Ly&g z8v(A^_4u{OC&QX3FvNRrCIGVqG4=(pm@g3wT5vLiF z$N96;K1BRb>*~h^F-$VVV+2Ox=MUfayYiI2O+Q~)U#9ihMas^4E_W6a zd=C&ZVd(XvSvu%8^jrrSdJR2|b56Upo_si12^lx^bpLSLZ|LQ7UmMyDy}Eyxk(TKQ zq$7}yKso~H2&5yBj==v%1dch!pYS&QE!*3Y`N!!(c(=!68Oe5U)63Z}$p9p0-wJh4 za$*yNbAljzHCcMP-c2TxJ7t+BWIbI6Qe~PL_H^w{m1#oQ)Achh!`oKrtyoBZDCgVC z6K&=5Y~|5rx$xuC<^$Jzo8R*`9qL}S##0zCe9haskmje!(Z}3E9ItdcC)|ccpo6Uo z$Aar^%^XQ$UT)Rw1ezqXQguj`o(;TPvb%|i~6Z+y-f>mrL4F4aBR4@W#R28B)U!|lLL%9CJ3D@yrMzDxGWglC7h~ z)A@NvrSnL<$2ldA<~=NpllE77optfVqo@$MhBy`5E1lEVc5Ilmn=ay1&!J||@sCFA zDL^4n23(WpxTD2${8CS6!-ri@Kucah@gz1((-n9Qp@!mqHI#sfmaRS-B~15a zaM-VTS^da4+&piyr8vB)@|&m0L+IWg;GWpid0vKSR7htt)0H4%g7S3M_DiRfy`SFmPn>dbOJhxi?(MONCy4ia zW-EAmJ5Q_J+neKjfw*si+fLke#iccm;(iVWYLA(XzC2ZXQ*V~@j>TBqG{?QI zqqc)gEslcIGBm^6=4~BaMSMs5B8>LuGe>}^t6y+-!&pEkkh%F))wve9@5+QXK;NI* z2hEbuFNPV=GJ^gIt1Uox8_?dH$?NY-=xl=iR6$z}=m!$|qzOHpptk`EANQGtqhG>s zgYr>o_&7(l>}nIbkDx^gI^KY)PRuf)_Y-uqf*zaF*Wb@%%SM>cFhM`WsuTU)Z$J-8 zXySAuG#-NP0Ti7$Xd0%G;bGguZnG&{YcRHlW94 z%Suh?;{=_kphFC3r-WW;LK_Ks0wWonaGHiHGQ0}|0eequuN2->2cK?ABfahnPK;0r*@6m*dR{j2Q6^(OR}1f2~iI&qn4cz_J&Djz2% z^>yMxc_oIK(2WE=iq{MJ`-TA>CZUJ%dq%3iD+szzK_4`rsuO!m==lV_UqLq;(6?nL z?lYmE^#V#iS)vo$O~YTvuul1yZTPrEwyea2K26YR3VON$Rh@8|&^rj~P|$ZS=?hJ_ zY}pBVB=Y0pT7tfXcL_rCtO4zi(1Rwlkf4tPicZ8$!x%E$t$c(HAF30#n9z?u2ee8- zJqFY%`+JQEeUYFG6m*;cRh^h^LhmJLo`N2^xUatlWha~_w1%J`s7LIVcuJ-MaU~gE zhXH|j-0(3~w(M6XbP_=yP|!vL>TEa*p`qXXQeMCM3@^VeaJk;Vb>B?5sEPYM;Z_4z zVBp@AxK$?ZNy1G5o8A|xTJ9z7C}`cL_7-TN8?SVU)hMEZHzuay)6}`GDel185pVCs zYY@5CQLmt16oW^AqgQQCJ6^WmN1@5n>uB2AyUlsm6S&@Ze{LsbXO?u4g-r8{P==+l zH>Z6^W11Rg)bVIbW`y)v**veZrZxqXnAz&WRa6oSa?afnD4sLwa_&Sxx#{i0#Sgo zK!#3*GFK?puU{`N6*+-AKRFWON5_&$|M6sUGM>+$;CKdajZJvpEyUwH0}~#dwwY}} zzQ}RTkW1-5V7zR^jw8t={cqU;*#?<_dcQUyiGVf?IxgT?|C)(61u>h}@D9V=i+J+nC)b7LS`h zW#&Z}kec|{;+PAZB$5(|^f4R-&=)~+qPp(Hu>@_MgZhPMMDFbw%g#7$NWKG2l5Kec z>UHSDaZD`w>>p*3J)P1KNJk(Yfpi4Y5lBZM9f5QN(h*2U;D0v)yf2UU;n9+T5-*Ur z+^p0Bf7Pg@7b9or;=yq$Ggg;*|KB;f%qt5G6TAWW#P z&<*2u$2AT=+snlDkJn|!;Xf7GKg3n4@aLQ?w`qS^pUN_~3(GiJZqRltVP%>7q22%2 zL~7r+&0~SS-bGrj(y~lT8~mRZvx^jD`Lbp6U6a<9*Vo1BU4@128SaAo!g=+wTsT>4 zzQ!Ak&!OUI(hi~P;DF^$9VnLuoC!G)o~}~{cDHuh`a?X@yYxO)6lI!;GwJ!h(P~kO zj6;kQxkXCD_Ydu^c8If5<6c>XxEUGp!}lTUcS!u`)IM6)KZ49a?H{aPyQQB|V%PcX zp-&FvXP?BME$rjq4M{sQwSS(yyeIKvQuh<jlO-wNfZa#51xxa4W!S9 z-lbghJGV$Zo;F4O)S`3{KUYJaE5@d-3$uKK;KQWP$P@frWBi6e@Xgx4y*>9p?_#nc zAA&wFW31r&fK5-6o+6=19JjsD=OUhCiWsTnx1b*hOI(jz`5O~V`!t@%A+z4p_;+-$ z)8kzJb_D(V&{I3@evUxzVyA}8!Gr4pqs-ZhpdXomw}MJYV6{2r{y%npqDrkp&6i9!_s;UW<`)VwIEEI`azWPm~B2*i$ z3B&?^cfr62+F~EHe36K6vlXa|MK+7dh_5za`Ri+IH-lo7EFfe3pf^TquqXdUZ0C>6 z^36cYEURSol9e9Ib9piC@GrjV@+B)vmqCpWDJ+jyTfN1rg|&Re6-$?_u&yX6xze-7 zTC-&73XdhiH9p?cU&OohX^Z|pZ2wPLv6uhrzB!<;1X5V(Dd6|TP)TTGpL(8OYCpVjN%>GCPcb-; zs(iIUI8$_lTevagYcZ1f7Wz_i{7wbCGrQ-ZlCB-e>7zpq6(2I_U9NL5SPW^G}vsKhHny?e~FC zb5G{;{FG(4u9wzYA7;7{1)4iDpXZA#d0xxyXB^80 z@M$i}WuA|+T%{}H7<2o%4XwyfOqtK~W0r9Z1ulps~yAI-Y;?cexxGy8kFKkK_0_bkzS8U$1X>O((PvW%As2Vm`~$ zKsNcJNMj3>I7&h;{urp*KK=hKS~X`GE -#include - -#define DIVISOR_SIZE 3 -#define CARRY_SIZE 7 -#define SUM_SIZE 7 -#define TOT_SIZE 7 - -void disp_binary(double, int, int); - -struct bits { - unsigned int divisor : DIVISOR_SIZE; - int tot : TOT_SIZE; -} pla; - -/* - - Function: disp_binary - Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number - Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point - Return value: none - -*/ -void disp_binary(double x, int bits_to_left, int bits_to_right) { - int i; - double diff; - - if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - printf("0"); - } - if (i == bits_to_right+1) - ; - - return; - } - - if (x < 0.0) - x = pow(2.0, ((double) bits_to_left)) + x; - - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - diff = pow(2.0, ((double) -i) ); - if (x < diff) - printf("0"); - else { - printf("1"); - x -= diff; - } - if (i == 0) - ; - - } - -} - -int main() { - int m; - int n; - int o; - pla.divisor = 0; - pla.tot = 0; - printf("\tcase({D[5:3],Wmsbs})\n"); - for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { - for (m=0; m < pow(2.0, TOT_SIZE); m++) { - printf("\t\t11'b"); - disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); - printf("_"); - disp_binary((double) pla.tot, TOT_SIZE, 0); - printf(": q = 4'b"); - - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - */ - switch (pla.divisor) { - case 0: - if ((pla.tot) >= 24) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -26) - printf("0010"); - else - printf("0001"); - break; - case 1: - if ((pla.tot) >= 28) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -10) - printf("0000"); - else if ((pla.tot) >= -28) - printf("0010"); - else - printf("0001"); - break; - case 2: - if ((pla.tot) >= 32) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -12) - printf("0000"); - else if ((pla.tot) >= -32) - printf("0010"); - else - printf("0001"); - break; - case 3: - if ((pla.tot) >= 32) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -12) - printf("0000"); - else if ((pla.tot) >= -34) - printf("0010"); - else - printf("0001"); - break; - case 4: - if ((pla.tot) >= 36) - printf("1000"); - else if ((pla.tot) >= 12) - printf("0100"); - else if ((pla.tot) >= -12) - printf("0000"); - else if ((pla.tot) >= -36) - printf("0010"); - else - printf("0001"); - break; - case 5: - if ((pla.tot) >= 40) - printf("1000"); - else if ((pla.tot) >= 12) - printf("0100"); - else if ((pla.tot) >= -16) - printf("0000"); - else if ((pla.tot) >= -40) - printf("0010"); - else - printf("0001"); - break; - case 6: - if ((pla.tot) >= 40) - printf("1000"); - else if ((pla.tot) >= 16) - printf("0100"); - else if ((pla.tot) >= -16) - printf("0000"); - else if ((pla.tot) >= -44) - printf("0010"); - else - printf("0001"); - break; - case 7: - if ((pla.tot) >= 44) - printf("1000"); - else if ((pla.tot) >= 16) - printf("0100"); - else if ((pla.tot) >= -16) - printf("0000"); - else if ((pla.tot) >= -46) - printf("0010"); - else - printf("0001"); - break; - default: printf ("XXX"); - - } - - printf(";\n"); - (pla.tot)++; - } - (pla.divisor)++; - } - printf("\tendcase\n"); - -} diff --git a/pipelined/srt/qslc_sqrt_r4a2.sv b/pipelined/srt/qslc_sqrt_r4a2.sv deleted file mode 100644 index 805dbbaeb..000000000 --- a/pipelined/srt/qslc_sqrt_r4a2.sv +++ /dev/null @@ -1,1026 +0,0 @@ - case({D[5:3],Wmsbs}) - 11'b000_0000000: q = 4'b0000; - 11'b000_0000001: q = 4'b0000; - 11'b000_0000010: q = 4'b0000; - 11'b000_0000011: q = 4'b0000; - 11'b000_0000100: q = 4'b0000; - 11'b000_0000101: q = 4'b0000; - 11'b000_0000110: q = 4'b0000; - 11'b000_0000111: q = 4'b0000; - 11'b000_0001000: q = 4'b0100; - 11'b000_0001001: q = 4'b0100; - 11'b000_0001010: q = 4'b0100; - 11'b000_0001011: q = 4'b0100; - 11'b000_0001100: q = 4'b0100; - 11'b000_0001101: q = 4'b0100; - 11'b000_0001110: q = 4'b0100; - 11'b000_0001111: q = 4'b0100; - 11'b000_0010000: q = 4'b0100; - 11'b000_0010001: q = 4'b0100; - 11'b000_0010010: q = 4'b0100; - 11'b000_0010011: q = 4'b0100; - 11'b000_0010100: q = 4'b0100; - 11'b000_0010101: q = 4'b0100; - 11'b000_0010110: q = 4'b0100; - 11'b000_0010111: q = 4'b0100; - 11'b000_0011000: q = 4'b1000; - 11'b000_0011001: q = 4'b1000; - 11'b000_0011010: q = 4'b1000; - 11'b000_0011011: q = 4'b1000; - 11'b000_0011100: q = 4'b1000; - 11'b000_0011101: q = 4'b1000; - 11'b000_0011110: q = 4'b1000; - 11'b000_0011111: q = 4'b1000; - 11'b000_0100000: q = 4'b1000; - 11'b000_0100001: q = 4'b1000; - 11'b000_0100010: q = 4'b1000; - 11'b000_0100011: q = 4'b1000; - 11'b000_0100100: q = 4'b1000; - 11'b000_0100101: q = 4'b1000; - 11'b000_0100110: q = 4'b1000; - 11'b000_0100111: q = 4'b1000; - 11'b000_0101000: q = 4'b1000; - 11'b000_0101001: q = 4'b1000; - 11'b000_0101010: q = 4'b1000; - 11'b000_0101011: q = 4'b1000; - 11'b000_0101100: q = 4'b1000; - 11'b000_0101101: q = 4'b1000; - 11'b000_0101110: q = 4'b1000; - 11'b000_0101111: q = 4'b1000; - 11'b000_0110000: q = 4'b1000; - 11'b000_0110001: q = 4'b1000; - 11'b000_0110010: q = 4'b1000; - 11'b000_0110011: q = 4'b1000; - 11'b000_0110100: q = 4'b1000; - 11'b000_0110101: q = 4'b1000; - 11'b000_0110110: q = 4'b1000; - 11'b000_0110111: q = 4'b1000; - 11'b000_0111000: q = 4'b1000; - 11'b000_0111001: q = 4'b1000; - 11'b000_0111010: q = 4'b1000; - 11'b000_0111011: q = 4'b1000; - 11'b000_0111100: q = 4'b1000; - 11'b000_0111101: q = 4'b1000; - 11'b000_0111110: q = 4'b1000; - 11'b000_0111111: q = 4'b1000; - 11'b000_1000000: q = 4'b0001; - 11'b000_1000001: q = 4'b0001; - 11'b000_1000010: q = 4'b0001; - 11'b000_1000011: q = 4'b0001; - 11'b000_1000100: q = 4'b0001; - 11'b000_1000101: q = 4'b0001; - 11'b000_1000110: q = 4'b0001; - 11'b000_1000111: q = 4'b0001; - 11'b000_1001000: q = 4'b0001; - 11'b000_1001001: q = 4'b0001; - 11'b000_1001010: q = 4'b0001; - 11'b000_1001011: q = 4'b0001; - 11'b000_1001100: q = 4'b0001; - 11'b000_1001101: q = 4'b0001; - 11'b000_1001110: q = 4'b0001; - 11'b000_1001111: q = 4'b0001; - 11'b000_1010000: q = 4'b0001; - 11'b000_1010001: q = 4'b0001; - 11'b000_1010010: q = 4'b0001; - 11'b000_1010011: q = 4'b0001; - 11'b000_1010100: q = 4'b0001; - 11'b000_1010101: q = 4'b0001; - 11'b000_1010110: q = 4'b0001; - 11'b000_1010111: q = 4'b0001; - 11'b000_1011000: q = 4'b0001; - 11'b000_1011001: q = 4'b0001; - 11'b000_1011010: q = 4'b0001; - 11'b000_1011011: q = 4'b0001; - 11'b000_1011100: q = 4'b0001; - 11'b000_1011101: q = 4'b0001; - 11'b000_1011110: q = 4'b0001; - 11'b000_1011111: q = 4'b0001; - 11'b000_1100000: q = 4'b0001; - 11'b000_1100001: q = 4'b0001; - 11'b000_1100010: q = 4'b0001; - 11'b000_1100011: q = 4'b0001; - 11'b000_1100100: q = 4'b0001; - 11'b000_1100101: q = 4'b0001; - 11'b000_1100110: q = 4'b0010; - 11'b000_1100111: q = 4'b0010; - 11'b000_1101000: q = 4'b0010; - 11'b000_1101001: q = 4'b0010; - 11'b000_1101010: q = 4'b0010; - 11'b000_1101011: q = 4'b0010; - 11'b000_1101100: q = 4'b0010; - 11'b000_1101101: q = 4'b0010; - 11'b000_1101110: q = 4'b0010; - 11'b000_1101111: q = 4'b0010; - 11'b000_1110000: q = 4'b0010; - 11'b000_1110001: q = 4'b0010; - 11'b000_1110010: q = 4'b0010; - 11'b000_1110011: q = 4'b0010; - 11'b000_1110100: q = 4'b0010; - 11'b000_1110101: q = 4'b0010; - 11'b000_1110110: q = 4'b0010; - 11'b000_1110111: q = 4'b0010; - 11'b000_1111000: q = 4'b0000; - 11'b000_1111001: q = 4'b0000; - 11'b000_1111010: q = 4'b0000; - 11'b000_1111011: q = 4'b0000; - 11'b000_1111100: q = 4'b0000; - 11'b000_1111101: q = 4'b0000; - 11'b000_1111110: q = 4'b0000; - 11'b000_1111111: q = 4'b0000; - 11'b001_0000000: q = 4'b0000; - 11'b001_0000001: q = 4'b0000; - 11'b001_0000010: q = 4'b0000; - 11'b001_0000011: q = 4'b0000; - 11'b001_0000100: q = 4'b0000; - 11'b001_0000101: q = 4'b0000; - 11'b001_0000110: q = 4'b0000; - 11'b001_0000111: q = 4'b0000; - 11'b001_0001000: q = 4'b0100; - 11'b001_0001001: q = 4'b0100; - 11'b001_0001010: q = 4'b0100; - 11'b001_0001011: q = 4'b0100; - 11'b001_0001100: q = 4'b0100; - 11'b001_0001101: q = 4'b0100; - 11'b001_0001110: q = 4'b0100; - 11'b001_0001111: q = 4'b0100; - 11'b001_0010000: q = 4'b0100; - 11'b001_0010001: q = 4'b0100; - 11'b001_0010010: q = 4'b0100; - 11'b001_0010011: q = 4'b0100; - 11'b001_0010100: q = 4'b0100; - 11'b001_0010101: q = 4'b0100; - 11'b001_0010110: q = 4'b0100; - 11'b001_0010111: q = 4'b0100; - 11'b001_0011000: q = 4'b0100; - 11'b001_0011001: q = 4'b0100; - 11'b001_0011010: q = 4'b0100; - 11'b001_0011011: q = 4'b0100; - 11'b001_0011100: q = 4'b1000; - 11'b001_0011101: q = 4'b1000; - 11'b001_0011110: q = 4'b1000; - 11'b001_0011111: q = 4'b1000; - 11'b001_0100000: q = 4'b1000; - 11'b001_0100001: q = 4'b1000; - 11'b001_0100010: q = 4'b1000; - 11'b001_0100011: q = 4'b1000; - 11'b001_0100100: q = 4'b1000; - 11'b001_0100101: q = 4'b1000; - 11'b001_0100110: q = 4'b1000; - 11'b001_0100111: q = 4'b1000; - 11'b001_0101000: q = 4'b1000; - 11'b001_0101001: q = 4'b1000; - 11'b001_0101010: q = 4'b1000; - 11'b001_0101011: q = 4'b1000; - 11'b001_0101100: q = 4'b1000; - 11'b001_0101101: q = 4'b1000; - 11'b001_0101110: q = 4'b1000; - 11'b001_0101111: q = 4'b1000; - 11'b001_0110000: q = 4'b1000; - 11'b001_0110001: q = 4'b1000; - 11'b001_0110010: q = 4'b1000; - 11'b001_0110011: q = 4'b1000; - 11'b001_0110100: q = 4'b1000; - 11'b001_0110101: q = 4'b1000; - 11'b001_0110110: q = 4'b1000; - 11'b001_0110111: q = 4'b1000; - 11'b001_0111000: q = 4'b1000; - 11'b001_0111001: q = 4'b1000; - 11'b001_0111010: q = 4'b1000; - 11'b001_0111011: q = 4'b1000; - 11'b001_0111100: q = 4'b1000; - 11'b001_0111101: q = 4'b1000; - 11'b001_0111110: q = 4'b1000; - 11'b001_0111111: q = 4'b1000; - 11'b001_1000000: q = 4'b0001; - 11'b001_1000001: q = 4'b0001; - 11'b001_1000010: q = 4'b0001; - 11'b001_1000011: q = 4'b0001; - 11'b001_1000100: q = 4'b0001; - 11'b001_1000101: q = 4'b0001; - 11'b001_1000110: q = 4'b0001; - 11'b001_1000111: q = 4'b0001; - 11'b001_1001000: q = 4'b0001; - 11'b001_1001001: q = 4'b0001; - 11'b001_1001010: q = 4'b0001; - 11'b001_1001011: q = 4'b0001; - 11'b001_1001100: q = 4'b0001; - 11'b001_1001101: q = 4'b0001; - 11'b001_1001110: q = 4'b0001; - 11'b001_1001111: q = 4'b0001; - 11'b001_1010000: q = 4'b0001; - 11'b001_1010001: q = 4'b0001; - 11'b001_1010010: q = 4'b0001; - 11'b001_1010011: q = 4'b0001; - 11'b001_1010100: q = 4'b0001; - 11'b001_1010101: q = 4'b0001; - 11'b001_1010110: q = 4'b0001; - 11'b001_1010111: q = 4'b0001; - 11'b001_1011000: q = 4'b0001; - 11'b001_1011001: q = 4'b0001; - 11'b001_1011010: q = 4'b0001; - 11'b001_1011011: q = 4'b0001; - 11'b001_1011100: q = 4'b0001; - 11'b001_1011101: q = 4'b0001; - 11'b001_1011110: q = 4'b0001; - 11'b001_1011111: q = 4'b0001; - 11'b001_1100000: q = 4'b0001; - 11'b001_1100001: q = 4'b0001; - 11'b001_1100010: q = 4'b0001; - 11'b001_1100011: q = 4'b0001; - 11'b001_1100100: q = 4'b0010; - 11'b001_1100101: q = 4'b0010; - 11'b001_1100110: q = 4'b0010; - 11'b001_1100111: q = 4'b0010; - 11'b001_1101000: q = 4'b0010; - 11'b001_1101001: q = 4'b0010; - 11'b001_1101010: q = 4'b0010; - 11'b001_1101011: q = 4'b0010; - 11'b001_1101100: q = 4'b0010; - 11'b001_1101101: q = 4'b0010; - 11'b001_1101110: q = 4'b0010; - 11'b001_1101111: q = 4'b0010; - 11'b001_1110000: q = 4'b0010; - 11'b001_1110001: q = 4'b0010; - 11'b001_1110010: q = 4'b0010; - 11'b001_1110011: q = 4'b0010; - 11'b001_1110100: q = 4'b0010; - 11'b001_1110101: q = 4'b0010; - 11'b001_1110110: q = 4'b0000; - 11'b001_1110111: q = 4'b0000; - 11'b001_1111000: q = 4'b0000; - 11'b001_1111001: q = 4'b0000; - 11'b001_1111010: q = 4'b0000; - 11'b001_1111011: q = 4'b0000; - 11'b001_1111100: q = 4'b0000; - 11'b001_1111101: q = 4'b0000; - 11'b001_1111110: q = 4'b0000; - 11'b001_1111111: q = 4'b0000; - 11'b010_0000000: q = 4'b0000; - 11'b010_0000001: q = 4'b0000; - 11'b010_0000010: q = 4'b0000; - 11'b010_0000011: q = 4'b0000; - 11'b010_0000100: q = 4'b0000; - 11'b010_0000101: q = 4'b0000; - 11'b010_0000110: q = 4'b0000; - 11'b010_0000111: q = 4'b0000; - 11'b010_0001000: q = 4'b0100; - 11'b010_0001001: q = 4'b0100; - 11'b010_0001010: q = 4'b0100; - 11'b010_0001011: q = 4'b0100; - 11'b010_0001100: q = 4'b0100; - 11'b010_0001101: q = 4'b0100; - 11'b010_0001110: q = 4'b0100; - 11'b010_0001111: q = 4'b0100; - 11'b010_0010000: q = 4'b0100; - 11'b010_0010001: q = 4'b0100; - 11'b010_0010010: q = 4'b0100; - 11'b010_0010011: q = 4'b0100; - 11'b010_0010100: q = 4'b0100; - 11'b010_0010101: q = 4'b0100; - 11'b010_0010110: q = 4'b0100; - 11'b010_0010111: q = 4'b0100; - 11'b010_0011000: q = 4'b0100; - 11'b010_0011001: q = 4'b0100; - 11'b010_0011010: q = 4'b0100; - 11'b010_0011011: q = 4'b0100; - 11'b010_0011100: q = 4'b0100; - 11'b010_0011101: q = 4'b0100; - 11'b010_0011110: q = 4'b0100; - 11'b010_0011111: q = 4'b0100; - 11'b010_0100000: q = 4'b1000; - 11'b010_0100001: q = 4'b1000; - 11'b010_0100010: q = 4'b1000; - 11'b010_0100011: q = 4'b1000; - 11'b010_0100100: q = 4'b1000; - 11'b010_0100101: q = 4'b1000; - 11'b010_0100110: q = 4'b1000; - 11'b010_0100111: q = 4'b1000; - 11'b010_0101000: q = 4'b1000; - 11'b010_0101001: q = 4'b1000; - 11'b010_0101010: q = 4'b1000; - 11'b010_0101011: q = 4'b1000; - 11'b010_0101100: q = 4'b1000; - 11'b010_0101101: q = 4'b1000; - 11'b010_0101110: q = 4'b1000; - 11'b010_0101111: q = 4'b1000; - 11'b010_0110000: q = 4'b1000; - 11'b010_0110001: q = 4'b1000; - 11'b010_0110010: q = 4'b1000; - 11'b010_0110011: q = 4'b1000; - 11'b010_0110100: q = 4'b1000; - 11'b010_0110101: q = 4'b1000; - 11'b010_0110110: q = 4'b1000; - 11'b010_0110111: q = 4'b1000; - 11'b010_0111000: q = 4'b1000; - 11'b010_0111001: q = 4'b1000; - 11'b010_0111010: q = 4'b1000; - 11'b010_0111011: q = 4'b1000; - 11'b010_0111100: q = 4'b1000; - 11'b010_0111101: q = 4'b1000; - 11'b010_0111110: q = 4'b1000; - 11'b010_0111111: q = 4'b1000; - 11'b010_1000000: q = 4'b0001; - 11'b010_1000001: q = 4'b0001; - 11'b010_1000010: q = 4'b0001; - 11'b010_1000011: q = 4'b0001; - 11'b010_1000100: q = 4'b0001; - 11'b010_1000101: q = 4'b0001; - 11'b010_1000110: q = 4'b0001; - 11'b010_1000111: q = 4'b0001; - 11'b010_1001000: q = 4'b0001; - 11'b010_1001001: q = 4'b0001; - 11'b010_1001010: q = 4'b0001; - 11'b010_1001011: q = 4'b0001; - 11'b010_1001100: q = 4'b0001; - 11'b010_1001101: q = 4'b0001; - 11'b010_1001110: q = 4'b0001; - 11'b010_1001111: q = 4'b0001; - 11'b010_1010000: q = 4'b0001; - 11'b010_1010001: q = 4'b0001; - 11'b010_1010010: q = 4'b0001; - 11'b010_1010011: q = 4'b0001; - 11'b010_1010100: q = 4'b0001; - 11'b010_1010101: q = 4'b0001; - 11'b010_1010110: q = 4'b0001; - 11'b010_1010111: q = 4'b0001; - 11'b010_1011000: q = 4'b0001; - 11'b010_1011001: q = 4'b0001; - 11'b010_1011010: q = 4'b0001; - 11'b010_1011011: q = 4'b0001; - 11'b010_1011100: q = 4'b0001; - 11'b010_1011101: q = 4'b0001; - 11'b010_1011110: q = 4'b0001; - 11'b010_1011111: q = 4'b0001; - 11'b010_1100000: q = 4'b0010; - 11'b010_1100001: q = 4'b0010; - 11'b010_1100010: q = 4'b0010; - 11'b010_1100011: q = 4'b0010; - 11'b010_1100100: q = 4'b0010; - 11'b010_1100101: q = 4'b0010; - 11'b010_1100110: q = 4'b0010; - 11'b010_1100111: q = 4'b0010; - 11'b010_1101000: q = 4'b0010; - 11'b010_1101001: q = 4'b0010; - 11'b010_1101010: q = 4'b0010; - 11'b010_1101011: q = 4'b0010; - 11'b010_1101100: q = 4'b0010; - 11'b010_1101101: q = 4'b0010; - 11'b010_1101110: q = 4'b0010; - 11'b010_1101111: q = 4'b0010; - 11'b010_1110000: q = 4'b0010; - 11'b010_1110001: q = 4'b0010; - 11'b010_1110010: q = 4'b0010; - 11'b010_1110011: q = 4'b0010; - 11'b010_1110100: q = 4'b0000; - 11'b010_1110101: q = 4'b0000; - 11'b010_1110110: q = 4'b0000; - 11'b010_1110111: q = 4'b0000; - 11'b010_1111000: q = 4'b0000; - 11'b010_1111001: q = 4'b0000; - 11'b010_1111010: q = 4'b0000; - 11'b010_1111011: q = 4'b0000; - 11'b010_1111100: q = 4'b0000; - 11'b010_1111101: q = 4'b0000; - 11'b010_1111110: q = 4'b0000; - 11'b010_1111111: q = 4'b0000; - 11'b011_0000000: q = 4'b0000; - 11'b011_0000001: q = 4'b0000; - 11'b011_0000010: q = 4'b0000; - 11'b011_0000011: q = 4'b0000; - 11'b011_0000100: q = 4'b0000; - 11'b011_0000101: q = 4'b0000; - 11'b011_0000110: q = 4'b0000; - 11'b011_0000111: q = 4'b0000; - 11'b011_0001000: q = 4'b0100; - 11'b011_0001001: q = 4'b0100; - 11'b011_0001010: q = 4'b0100; - 11'b011_0001011: q = 4'b0100; - 11'b011_0001100: q = 4'b0100; - 11'b011_0001101: q = 4'b0100; - 11'b011_0001110: q = 4'b0100; - 11'b011_0001111: q = 4'b0100; - 11'b011_0010000: q = 4'b0100; - 11'b011_0010001: q = 4'b0100; - 11'b011_0010010: q = 4'b0100; - 11'b011_0010011: q = 4'b0100; - 11'b011_0010100: q = 4'b0100; - 11'b011_0010101: q = 4'b0100; - 11'b011_0010110: q = 4'b0100; - 11'b011_0010111: q = 4'b0100; - 11'b011_0011000: q = 4'b0100; - 11'b011_0011001: q = 4'b0100; - 11'b011_0011010: q = 4'b0100; - 11'b011_0011011: q = 4'b0100; - 11'b011_0011100: q = 4'b0100; - 11'b011_0011101: q = 4'b0100; - 11'b011_0011110: q = 4'b0100; - 11'b011_0011111: q = 4'b0100; - 11'b011_0100000: q = 4'b1000; - 11'b011_0100001: q = 4'b1000; - 11'b011_0100010: q = 4'b1000; - 11'b011_0100011: q = 4'b1000; - 11'b011_0100100: q = 4'b1000; - 11'b011_0100101: q = 4'b1000; - 11'b011_0100110: q = 4'b1000; - 11'b011_0100111: q = 4'b1000; - 11'b011_0101000: q = 4'b1000; - 11'b011_0101001: q = 4'b1000; - 11'b011_0101010: q = 4'b1000; - 11'b011_0101011: q = 4'b1000; - 11'b011_0101100: q = 4'b1000; - 11'b011_0101101: q = 4'b1000; - 11'b011_0101110: q = 4'b1000; - 11'b011_0101111: q = 4'b1000; - 11'b011_0110000: q = 4'b1000; - 11'b011_0110001: q = 4'b1000; - 11'b011_0110010: q = 4'b1000; - 11'b011_0110011: q = 4'b1000; - 11'b011_0110100: q = 4'b1000; - 11'b011_0110101: q = 4'b1000; - 11'b011_0110110: q = 4'b1000; - 11'b011_0110111: q = 4'b1000; - 11'b011_0111000: q = 4'b1000; - 11'b011_0111001: q = 4'b1000; - 11'b011_0111010: q = 4'b1000; - 11'b011_0111011: q = 4'b1000; - 11'b011_0111100: q = 4'b1000; - 11'b011_0111101: q = 4'b1000; - 11'b011_0111110: q = 4'b1000; - 11'b011_0111111: q = 4'b1000; - 11'b011_1000000: q = 4'b0001; - 11'b011_1000001: q = 4'b0001; - 11'b011_1000010: q = 4'b0001; - 11'b011_1000011: q = 4'b0001; - 11'b011_1000100: q = 4'b0001; - 11'b011_1000101: q = 4'b0001; - 11'b011_1000110: q = 4'b0001; - 11'b011_1000111: q = 4'b0001; - 11'b011_1001000: q = 4'b0001; - 11'b011_1001001: q = 4'b0001; - 11'b011_1001010: q = 4'b0001; - 11'b011_1001011: q = 4'b0001; - 11'b011_1001100: q = 4'b0001; - 11'b011_1001101: q = 4'b0001; - 11'b011_1001110: q = 4'b0001; - 11'b011_1001111: q = 4'b0001; - 11'b011_1010000: q = 4'b0001; - 11'b011_1010001: q = 4'b0001; - 11'b011_1010010: q = 4'b0001; - 11'b011_1010011: q = 4'b0001; - 11'b011_1010100: q = 4'b0001; - 11'b011_1010101: q = 4'b0001; - 11'b011_1010110: q = 4'b0001; - 11'b011_1010111: q = 4'b0001; - 11'b011_1011000: q = 4'b0001; - 11'b011_1011001: q = 4'b0001; - 11'b011_1011010: q = 4'b0001; - 11'b011_1011011: q = 4'b0001; - 11'b011_1011100: q = 4'b0001; - 11'b011_1011101: q = 4'b0001; - 11'b011_1011110: q = 4'b0010; - 11'b011_1011111: q = 4'b0010; - 11'b011_1100000: q = 4'b0010; - 11'b011_1100001: q = 4'b0010; - 11'b011_1100010: q = 4'b0010; - 11'b011_1100011: q = 4'b0010; - 11'b011_1100100: q = 4'b0010; - 11'b011_1100101: q = 4'b0010; - 11'b011_1100110: q = 4'b0010; - 11'b011_1100111: q = 4'b0010; - 11'b011_1101000: q = 4'b0010; - 11'b011_1101001: q = 4'b0010; - 11'b011_1101010: q = 4'b0010; - 11'b011_1101011: q = 4'b0010; - 11'b011_1101100: q = 4'b0010; - 11'b011_1101101: q = 4'b0010; - 11'b011_1101110: q = 4'b0010; - 11'b011_1101111: q = 4'b0010; - 11'b011_1110000: q = 4'b0010; - 11'b011_1110001: q = 4'b0010; - 11'b011_1110010: q = 4'b0010; - 11'b011_1110011: q = 4'b0010; - 11'b011_1110100: q = 4'b0000; - 11'b011_1110101: q = 4'b0000; - 11'b011_1110110: q = 4'b0000; - 11'b011_1110111: q = 4'b0000; - 11'b011_1111000: q = 4'b0000; - 11'b011_1111001: q = 4'b0000; - 11'b011_1111010: q = 4'b0000; - 11'b011_1111011: q = 4'b0000; - 11'b011_1111100: q = 4'b0000; - 11'b011_1111101: q = 4'b0000; - 11'b011_1111110: q = 4'b0000; - 11'b011_1111111: q = 4'b0000; - 11'b100_0000000: q = 4'b0000; - 11'b100_0000001: q = 4'b0000; - 11'b100_0000010: q = 4'b0000; - 11'b100_0000011: q = 4'b0000; - 11'b100_0000100: q = 4'b0000; - 11'b100_0000101: q = 4'b0000; - 11'b100_0000110: q = 4'b0000; - 11'b100_0000111: q = 4'b0000; - 11'b100_0001000: q = 4'b0000; - 11'b100_0001001: q = 4'b0000; - 11'b100_0001010: q = 4'b0000; - 11'b100_0001011: q = 4'b0000; - 11'b100_0001100: q = 4'b0100; - 11'b100_0001101: q = 4'b0100; - 11'b100_0001110: q = 4'b0100; - 11'b100_0001111: q = 4'b0100; - 11'b100_0010000: q = 4'b0100; - 11'b100_0010001: q = 4'b0100; - 11'b100_0010010: q = 4'b0100; - 11'b100_0010011: q = 4'b0100; - 11'b100_0010100: q = 4'b0100; - 11'b100_0010101: q = 4'b0100; - 11'b100_0010110: q = 4'b0100; - 11'b100_0010111: q = 4'b0100; - 11'b100_0011000: q = 4'b0100; - 11'b100_0011001: q = 4'b0100; - 11'b100_0011010: q = 4'b0100; - 11'b100_0011011: q = 4'b0100; - 11'b100_0011100: q = 4'b0100; - 11'b100_0011101: q = 4'b0100; - 11'b100_0011110: q = 4'b0100; - 11'b100_0011111: q = 4'b0100; - 11'b100_0100000: q = 4'b0100; - 11'b100_0100001: q = 4'b0100; - 11'b100_0100010: q = 4'b0100; - 11'b100_0100011: q = 4'b0100; - 11'b100_0100100: q = 4'b1000; - 11'b100_0100101: q = 4'b1000; - 11'b100_0100110: q = 4'b1000; - 11'b100_0100111: q = 4'b1000; - 11'b100_0101000: q = 4'b1000; - 11'b100_0101001: q = 4'b1000; - 11'b100_0101010: q = 4'b1000; - 11'b100_0101011: q = 4'b1000; - 11'b100_0101100: q = 4'b1000; - 11'b100_0101101: q = 4'b1000; - 11'b100_0101110: q = 4'b1000; - 11'b100_0101111: q = 4'b1000; - 11'b100_0110000: q = 4'b1000; - 11'b100_0110001: q = 4'b1000; - 11'b100_0110010: q = 4'b1000; - 11'b100_0110011: q = 4'b1000; - 11'b100_0110100: q = 4'b1000; - 11'b100_0110101: q = 4'b1000; - 11'b100_0110110: q = 4'b1000; - 11'b100_0110111: q = 4'b1000; - 11'b100_0111000: q = 4'b1000; - 11'b100_0111001: q = 4'b1000; - 11'b100_0111010: q = 4'b1000; - 11'b100_0111011: q = 4'b1000; - 11'b100_0111100: q = 4'b1000; - 11'b100_0111101: q = 4'b1000; - 11'b100_0111110: q = 4'b1000; - 11'b100_0111111: q = 4'b1000; - 11'b100_1000000: q = 4'b0001; - 11'b100_1000001: q = 4'b0001; - 11'b100_1000010: q = 4'b0001; - 11'b100_1000011: q = 4'b0001; - 11'b100_1000100: q = 4'b0001; - 11'b100_1000101: q = 4'b0001; - 11'b100_1000110: q = 4'b0001; - 11'b100_1000111: q = 4'b0001; - 11'b100_1001000: q = 4'b0001; - 11'b100_1001001: q = 4'b0001; - 11'b100_1001010: q = 4'b0001; - 11'b100_1001011: q = 4'b0001; - 11'b100_1001100: q = 4'b0001; - 11'b100_1001101: q = 4'b0001; - 11'b100_1001110: q = 4'b0001; - 11'b100_1001111: q = 4'b0001; - 11'b100_1010000: q = 4'b0001; - 11'b100_1010001: q = 4'b0001; - 11'b100_1010010: q = 4'b0001; - 11'b100_1010011: q = 4'b0001; - 11'b100_1010100: q = 4'b0001; - 11'b100_1010101: q = 4'b0001; - 11'b100_1010110: q = 4'b0001; - 11'b100_1010111: q = 4'b0001; - 11'b100_1011000: q = 4'b0001; - 11'b100_1011001: q = 4'b0001; - 11'b100_1011010: q = 4'b0001; - 11'b100_1011011: q = 4'b0001; - 11'b100_1011100: q = 4'b0010; - 11'b100_1011101: q = 4'b0010; - 11'b100_1011110: q = 4'b0010; - 11'b100_1011111: q = 4'b0010; - 11'b100_1100000: q = 4'b0010; - 11'b100_1100001: q = 4'b0010; - 11'b100_1100010: q = 4'b0010; - 11'b100_1100011: q = 4'b0010; - 11'b100_1100100: q = 4'b0010; - 11'b100_1100101: q = 4'b0010; - 11'b100_1100110: q = 4'b0010; - 11'b100_1100111: q = 4'b0010; - 11'b100_1101000: q = 4'b0010; - 11'b100_1101001: q = 4'b0010; - 11'b100_1101010: q = 4'b0010; - 11'b100_1101011: q = 4'b0010; - 11'b100_1101100: q = 4'b0010; - 11'b100_1101101: q = 4'b0010; - 11'b100_1101110: q = 4'b0010; - 11'b100_1101111: q = 4'b0010; - 11'b100_1110000: q = 4'b0010; - 11'b100_1110001: q = 4'b0010; - 11'b100_1110010: q = 4'b0010; - 11'b100_1110011: q = 4'b0010; - 11'b100_1110100: q = 4'b0000; - 11'b100_1110101: q = 4'b0000; - 11'b100_1110110: q = 4'b0000; - 11'b100_1110111: q = 4'b0000; - 11'b100_1111000: q = 4'b0000; - 11'b100_1111001: q = 4'b0000; - 11'b100_1111010: q = 4'b0000; - 11'b100_1111011: q = 4'b0000; - 11'b100_1111100: q = 4'b0000; - 11'b100_1111101: q = 4'b0000; - 11'b100_1111110: q = 4'b0000; - 11'b100_1111111: q = 4'b0000; - 11'b101_0000000: q = 4'b0000; - 11'b101_0000001: q = 4'b0000; - 11'b101_0000010: q = 4'b0000; - 11'b101_0000011: q = 4'b0000; - 11'b101_0000100: q = 4'b0000; - 11'b101_0000101: q = 4'b0000; - 11'b101_0000110: q = 4'b0000; - 11'b101_0000111: q = 4'b0000; - 11'b101_0001000: q = 4'b0000; - 11'b101_0001001: q = 4'b0000; - 11'b101_0001010: q = 4'b0000; - 11'b101_0001011: q = 4'b0000; - 11'b101_0001100: q = 4'b0100; - 11'b101_0001101: q = 4'b0100; - 11'b101_0001110: q = 4'b0100; - 11'b101_0001111: q = 4'b0100; - 11'b101_0010000: q = 4'b0100; - 11'b101_0010001: q = 4'b0100; - 11'b101_0010010: q = 4'b0100; - 11'b101_0010011: q = 4'b0100; - 11'b101_0010100: q = 4'b0100; - 11'b101_0010101: q = 4'b0100; - 11'b101_0010110: q = 4'b0100; - 11'b101_0010111: q = 4'b0100; - 11'b101_0011000: q = 4'b0100; - 11'b101_0011001: q = 4'b0100; - 11'b101_0011010: q = 4'b0100; - 11'b101_0011011: q = 4'b0100; - 11'b101_0011100: q = 4'b0100; - 11'b101_0011101: q = 4'b0100; - 11'b101_0011110: q = 4'b0100; - 11'b101_0011111: q = 4'b0100; - 11'b101_0100000: q = 4'b0100; - 11'b101_0100001: q = 4'b0100; - 11'b101_0100010: q = 4'b0100; - 11'b101_0100011: q = 4'b0100; - 11'b101_0100100: q = 4'b0100; - 11'b101_0100101: q = 4'b0100; - 11'b101_0100110: q = 4'b0100; - 11'b101_0100111: q = 4'b0100; - 11'b101_0101000: q = 4'b1000; - 11'b101_0101001: q = 4'b1000; - 11'b101_0101010: q = 4'b1000; - 11'b101_0101011: q = 4'b1000; - 11'b101_0101100: q = 4'b1000; - 11'b101_0101101: q = 4'b1000; - 11'b101_0101110: q = 4'b1000; - 11'b101_0101111: q = 4'b1000; - 11'b101_0110000: q = 4'b1000; - 11'b101_0110001: q = 4'b1000; - 11'b101_0110010: q = 4'b1000; - 11'b101_0110011: q = 4'b1000; - 11'b101_0110100: q = 4'b1000; - 11'b101_0110101: q = 4'b1000; - 11'b101_0110110: q = 4'b1000; - 11'b101_0110111: q = 4'b1000; - 11'b101_0111000: q = 4'b1000; - 11'b101_0111001: q = 4'b1000; - 11'b101_0111010: q = 4'b1000; - 11'b101_0111011: q = 4'b1000; - 11'b101_0111100: q = 4'b1000; - 11'b101_0111101: q = 4'b1000; - 11'b101_0111110: q = 4'b1000; - 11'b101_0111111: q = 4'b1000; - 11'b101_1000000: q = 4'b0001; - 11'b101_1000001: q = 4'b0001; - 11'b101_1000010: q = 4'b0001; - 11'b101_1000011: q = 4'b0001; - 11'b101_1000100: q = 4'b0001; - 11'b101_1000101: q = 4'b0001; - 11'b101_1000110: q = 4'b0001; - 11'b101_1000111: q = 4'b0001; - 11'b101_1001000: q = 4'b0001; - 11'b101_1001001: q = 4'b0001; - 11'b101_1001010: q = 4'b0001; - 11'b101_1001011: q = 4'b0001; - 11'b101_1001100: q = 4'b0001; - 11'b101_1001101: q = 4'b0001; - 11'b101_1001110: q = 4'b0001; - 11'b101_1001111: q = 4'b0001; - 11'b101_1010000: q = 4'b0001; - 11'b101_1010001: q = 4'b0001; - 11'b101_1010010: q = 4'b0001; - 11'b101_1010011: q = 4'b0001; - 11'b101_1010100: q = 4'b0001; - 11'b101_1010101: q = 4'b0001; - 11'b101_1010110: q = 4'b0001; - 11'b101_1010111: q = 4'b0001; - 11'b101_1011000: q = 4'b0010; - 11'b101_1011001: q = 4'b0010; - 11'b101_1011010: q = 4'b0010; - 11'b101_1011011: q = 4'b0010; - 11'b101_1011100: q = 4'b0010; - 11'b101_1011101: q = 4'b0010; - 11'b101_1011110: q = 4'b0010; - 11'b101_1011111: q = 4'b0010; - 11'b101_1100000: q = 4'b0010; - 11'b101_1100001: q = 4'b0010; - 11'b101_1100010: q = 4'b0010; - 11'b101_1100011: q = 4'b0010; - 11'b101_1100100: q = 4'b0010; - 11'b101_1100101: q = 4'b0010; - 11'b101_1100110: q = 4'b0010; - 11'b101_1100111: q = 4'b0010; - 11'b101_1101000: q = 4'b0010; - 11'b101_1101001: q = 4'b0010; - 11'b101_1101010: q = 4'b0010; - 11'b101_1101011: q = 4'b0010; - 11'b101_1101100: q = 4'b0010; - 11'b101_1101101: q = 4'b0010; - 11'b101_1101110: q = 4'b0010; - 11'b101_1101111: q = 4'b0010; - 11'b101_1110000: q = 4'b0000; - 11'b101_1110001: q = 4'b0000; - 11'b101_1110010: q = 4'b0000; - 11'b101_1110011: q = 4'b0000; - 11'b101_1110100: q = 4'b0000; - 11'b101_1110101: q = 4'b0000; - 11'b101_1110110: q = 4'b0000; - 11'b101_1110111: q = 4'b0000; - 11'b101_1111000: q = 4'b0000; - 11'b101_1111001: q = 4'b0000; - 11'b101_1111010: q = 4'b0000; - 11'b101_1111011: q = 4'b0000; - 11'b101_1111100: q = 4'b0000; - 11'b101_1111101: q = 4'b0000; - 11'b101_1111110: q = 4'b0000; - 11'b101_1111111: q = 4'b0000; - 11'b110_0000000: q = 4'b0000; - 11'b110_0000001: q = 4'b0000; - 11'b110_0000010: q = 4'b0000; - 11'b110_0000011: q = 4'b0000; - 11'b110_0000100: q = 4'b0000; - 11'b110_0000101: q = 4'b0000; - 11'b110_0000110: q = 4'b0000; - 11'b110_0000111: q = 4'b0000; - 11'b110_0001000: q = 4'b0000; - 11'b110_0001001: q = 4'b0000; - 11'b110_0001010: q = 4'b0000; - 11'b110_0001011: q = 4'b0000; - 11'b110_0001100: q = 4'b0000; - 11'b110_0001101: q = 4'b0000; - 11'b110_0001110: q = 4'b0000; - 11'b110_0001111: q = 4'b0000; - 11'b110_0010000: q = 4'b0100; - 11'b110_0010001: q = 4'b0100; - 11'b110_0010010: q = 4'b0100; - 11'b110_0010011: q = 4'b0100; - 11'b110_0010100: q = 4'b0100; - 11'b110_0010101: q = 4'b0100; - 11'b110_0010110: q = 4'b0100; - 11'b110_0010111: q = 4'b0100; - 11'b110_0011000: q = 4'b0100; - 11'b110_0011001: q = 4'b0100; - 11'b110_0011010: q = 4'b0100; - 11'b110_0011011: q = 4'b0100; - 11'b110_0011100: q = 4'b0100; - 11'b110_0011101: q = 4'b0100; - 11'b110_0011110: q = 4'b0100; - 11'b110_0011111: q = 4'b0100; - 11'b110_0100000: q = 4'b0100; - 11'b110_0100001: q = 4'b0100; - 11'b110_0100010: q = 4'b0100; - 11'b110_0100011: q = 4'b0100; - 11'b110_0100100: q = 4'b0100; - 11'b110_0100101: q = 4'b0100; - 11'b110_0100110: q = 4'b0100; - 11'b110_0100111: q = 4'b0100; - 11'b110_0101000: q = 4'b1000; - 11'b110_0101001: q = 4'b1000; - 11'b110_0101010: q = 4'b1000; - 11'b110_0101011: q = 4'b1000; - 11'b110_0101100: q = 4'b1000; - 11'b110_0101101: q = 4'b1000; - 11'b110_0101110: q = 4'b1000; - 11'b110_0101111: q = 4'b1000; - 11'b110_0110000: q = 4'b1000; - 11'b110_0110001: q = 4'b1000; - 11'b110_0110010: q = 4'b1000; - 11'b110_0110011: q = 4'b1000; - 11'b110_0110100: q = 4'b1000; - 11'b110_0110101: q = 4'b1000; - 11'b110_0110110: q = 4'b1000; - 11'b110_0110111: q = 4'b1000; - 11'b110_0111000: q = 4'b1000; - 11'b110_0111001: q = 4'b1000; - 11'b110_0111010: q = 4'b1000; - 11'b110_0111011: q = 4'b1000; - 11'b110_0111100: q = 4'b1000; - 11'b110_0111101: q = 4'b1000; - 11'b110_0111110: q = 4'b1000; - 11'b110_0111111: q = 4'b1000; - 11'b110_1000000: q = 4'b0001; - 11'b110_1000001: q = 4'b0001; - 11'b110_1000010: q = 4'b0001; - 11'b110_1000011: q = 4'b0001; - 11'b110_1000100: q = 4'b0001; - 11'b110_1000101: q = 4'b0001; - 11'b110_1000110: q = 4'b0001; - 11'b110_1000111: q = 4'b0001; - 11'b110_1001000: q = 4'b0001; - 11'b110_1001001: q = 4'b0001; - 11'b110_1001010: q = 4'b0001; - 11'b110_1001011: q = 4'b0001; - 11'b110_1001100: q = 4'b0001; - 11'b110_1001101: q = 4'b0001; - 11'b110_1001110: q = 4'b0001; - 11'b110_1001111: q = 4'b0001; - 11'b110_1010000: q = 4'b0001; - 11'b110_1010001: q = 4'b0001; - 11'b110_1010010: q = 4'b0001; - 11'b110_1010011: q = 4'b0001; - 11'b110_1010100: q = 4'b0010; - 11'b110_1010101: q = 4'b0010; - 11'b110_1010110: q = 4'b0010; - 11'b110_1010111: q = 4'b0010; - 11'b110_1011000: q = 4'b0010; - 11'b110_1011001: q = 4'b0010; - 11'b110_1011010: q = 4'b0010; - 11'b110_1011011: q = 4'b0010; - 11'b110_1011100: q = 4'b0010; - 11'b110_1011101: q = 4'b0010; - 11'b110_1011110: q = 4'b0010; - 11'b110_1011111: q = 4'b0010; - 11'b110_1100000: q = 4'b0010; - 11'b110_1100001: q = 4'b0010; - 11'b110_1100010: q = 4'b0010; - 11'b110_1100011: q = 4'b0010; - 11'b110_1100100: q = 4'b0010; - 11'b110_1100101: q = 4'b0010; - 11'b110_1100110: q = 4'b0010; - 11'b110_1100111: q = 4'b0010; - 11'b110_1101000: q = 4'b0010; - 11'b110_1101001: q = 4'b0010; - 11'b110_1101010: q = 4'b0010; - 11'b110_1101011: q = 4'b0010; - 11'b110_1101100: q = 4'b0010; - 11'b110_1101101: q = 4'b0010; - 11'b110_1101110: q = 4'b0010; - 11'b110_1101111: q = 4'b0010; - 11'b110_1110000: q = 4'b0000; - 11'b110_1110001: q = 4'b0000; - 11'b110_1110010: q = 4'b0000; - 11'b110_1110011: q = 4'b0000; - 11'b110_1110100: q = 4'b0000; - 11'b110_1110101: q = 4'b0000; - 11'b110_1110110: q = 4'b0000; - 11'b110_1110111: q = 4'b0000; - 11'b110_1111000: q = 4'b0000; - 11'b110_1111001: q = 4'b0000; - 11'b110_1111010: q = 4'b0000; - 11'b110_1111011: q = 4'b0000; - 11'b110_1111100: q = 4'b0000; - 11'b110_1111101: q = 4'b0000; - 11'b110_1111110: q = 4'b0000; - 11'b110_1111111: q = 4'b0000; - 11'b111_0000000: q = 4'b0000; - 11'b111_0000001: q = 4'b0000; - 11'b111_0000010: q = 4'b0000; - 11'b111_0000011: q = 4'b0000; - 11'b111_0000100: q = 4'b0000; - 11'b111_0000101: q = 4'b0000; - 11'b111_0000110: q = 4'b0000; - 11'b111_0000111: q = 4'b0000; - 11'b111_0001000: q = 4'b0000; - 11'b111_0001001: q = 4'b0000; - 11'b111_0001010: q = 4'b0000; - 11'b111_0001011: q = 4'b0000; - 11'b111_0001100: q = 4'b0000; - 11'b111_0001101: q = 4'b0000; - 11'b111_0001110: q = 4'b0000; - 11'b111_0001111: q = 4'b0000; - 11'b111_0010000: q = 4'b0100; - 11'b111_0010001: q = 4'b0100; - 11'b111_0010010: q = 4'b0100; - 11'b111_0010011: q = 4'b0100; - 11'b111_0010100: q = 4'b0100; - 11'b111_0010101: q = 4'b0100; - 11'b111_0010110: q = 4'b0100; - 11'b111_0010111: q = 4'b0100; - 11'b111_0011000: q = 4'b0100; - 11'b111_0011001: q = 4'b0100; - 11'b111_0011010: q = 4'b0100; - 11'b111_0011011: q = 4'b0100; - 11'b111_0011100: q = 4'b0100; - 11'b111_0011101: q = 4'b0100; - 11'b111_0011110: q = 4'b0100; - 11'b111_0011111: q = 4'b0100; - 11'b111_0100000: q = 4'b0100; - 11'b111_0100001: q = 4'b0100; - 11'b111_0100010: q = 4'b0100; - 11'b111_0100011: q = 4'b0100; - 11'b111_0100100: q = 4'b0100; - 11'b111_0100101: q = 4'b0100; - 11'b111_0100110: q = 4'b0100; - 11'b111_0100111: q = 4'b0100; - 11'b111_0101000: q = 4'b0100; - 11'b111_0101001: q = 4'b0100; - 11'b111_0101010: q = 4'b0100; - 11'b111_0101011: q = 4'b0100; - 11'b111_0101100: q = 4'b1000; - 11'b111_0101101: q = 4'b1000; - 11'b111_0101110: q = 4'b1000; - 11'b111_0101111: q = 4'b1000; - 11'b111_0110000: q = 4'b1000; - 11'b111_0110001: q = 4'b1000; - 11'b111_0110010: q = 4'b1000; - 11'b111_0110011: q = 4'b1000; - 11'b111_0110100: q = 4'b1000; - 11'b111_0110101: q = 4'b1000; - 11'b111_0110110: q = 4'b1000; - 11'b111_0110111: q = 4'b1000; - 11'b111_0111000: q = 4'b1000; - 11'b111_0111001: q = 4'b1000; - 11'b111_0111010: q = 4'b1000; - 11'b111_0111011: q = 4'b1000; - 11'b111_0111100: q = 4'b1000; - 11'b111_0111101: q = 4'b1000; - 11'b111_0111110: q = 4'b1000; - 11'b111_0111111: q = 4'b1000; - 11'b111_1000000: q = 4'b0001; - 11'b111_1000001: q = 4'b0001; - 11'b111_1000010: q = 4'b0001; - 11'b111_1000011: q = 4'b0001; - 11'b111_1000100: q = 4'b0001; - 11'b111_1000101: q = 4'b0001; - 11'b111_1000110: q = 4'b0001; - 11'b111_1000111: q = 4'b0001; - 11'b111_1001000: q = 4'b0001; - 11'b111_1001001: q = 4'b0001; - 11'b111_1001010: q = 4'b0001; - 11'b111_1001011: q = 4'b0001; - 11'b111_1001100: q = 4'b0001; - 11'b111_1001101: q = 4'b0001; - 11'b111_1001110: q = 4'b0001; - 11'b111_1001111: q = 4'b0001; - 11'b111_1010000: q = 4'b0001; - 11'b111_1010001: q = 4'b0001; - 11'b111_1010010: q = 4'b0010; - 11'b111_1010011: q = 4'b0010; - 11'b111_1010100: q = 4'b0010; - 11'b111_1010101: q = 4'b0010; - 11'b111_1010110: q = 4'b0010; - 11'b111_1010111: q = 4'b0010; - 11'b111_1011000: q = 4'b0010; - 11'b111_1011001: q = 4'b0010; - 11'b111_1011010: q = 4'b0010; - 11'b111_1011011: q = 4'b0010; - 11'b111_1011100: q = 4'b0010; - 11'b111_1011101: q = 4'b0010; - 11'b111_1011110: q = 4'b0010; - 11'b111_1011111: q = 4'b0010; - 11'b111_1100000: q = 4'b0010; - 11'b111_1100001: q = 4'b0010; - 11'b111_1100010: q = 4'b0010; - 11'b111_1100011: q = 4'b0010; - 11'b111_1100100: q = 4'b0010; - 11'b111_1100101: q = 4'b0010; - 11'b111_1100110: q = 4'b0010; - 11'b111_1100111: q = 4'b0010; - 11'b111_1101000: q = 4'b0010; - 11'b111_1101001: q = 4'b0010; - 11'b111_1101010: q = 4'b0010; - 11'b111_1101011: q = 4'b0010; - 11'b111_1101100: q = 4'b0010; - 11'b111_1101101: q = 4'b0010; - 11'b111_1101110: q = 4'b0010; - 11'b111_1101111: q = 4'b0010; - 11'b111_1110000: q = 4'b0000; - 11'b111_1110001: q = 4'b0000; - 11'b111_1110010: q = 4'b0000; - 11'b111_1110011: q = 4'b0000; - 11'b111_1110100: q = 4'b0000; - 11'b111_1110101: q = 4'b0000; - 11'b111_1110110: q = 4'b0000; - 11'b111_1110111: q = 4'b0000; - 11'b111_1111000: q = 4'b0000; - 11'b111_1111001: q = 4'b0000; - 11'b111_1111010: q = 4'b0000; - 11'b111_1111011: q = 4'b0000; - 11'b111_1111100: q = 4'b0000; - 11'b111_1111101: q = 4'b0000; - 11'b111_1111110: q = 4'b0000; - 11'b111_1111111: q = 4'b0000; - endcase diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 3e41c16c7..4cd23488b 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0) -`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN)) +`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 1)) module srt ( input logic clk, @@ -164,7 +164,7 @@ module srtpreproc ( assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); // Number of cycles of divider - assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2); + assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN + 2); endmodule ///////////////////////////////// From b874c5c05da18ab092805aaea9273461a5aa9b75 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 13 Jul 2022 15:01:38 -0700 Subject: [PATCH 023/138] removed minus 1 case in rounding --- pipelined/src/fpu/divsqrt.sv | 6 ++-- pipelined/src/fpu/fma.sv | 41 +++++++++++----------- pipelined/src/fpu/fmashiftcalc.sv | 7 ++-- pipelined/src/fpu/fpu.sv | 5 ++- pipelined/src/fpu/lzacorrection.sv | 3 +- pipelined/src/fpu/postprocess.sv | 7 ++-- pipelined/src/fpu/round.sv | 54 ++++++++++------------------- pipelined/src/fpu/srt-radix4.sv | 5 +-- pipelined/src/fpu/srtfsm.sv | 4 +-- pipelined/testbench/testbench-fp.sv | 8 ++--- 10 files changed, 60 insertions(+), 80 deletions(-) diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 8420baa13..7e240420c 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -43,7 +43,6 @@ module divsqrt( input logic StallM, input logic StallE, output logic DivStickyM, - output logic DivNegStickyM, output logic DivBusy, output logic DivDone, output logic [`NE+1:0] DivCalcExpM, @@ -58,11 +57,12 @@ module divsqrt( logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] Dpreproc; logic [`DURLEN-1:0] Dur; + logic NegSticky; srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, - .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, + .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); + srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 57b053da7..039876e9d 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -70,20 +70,21 @@ module fma( /////////////////////////////////////////////////////////////////////////////// // Alignment shifter /////////////////////////////////////////////////////////////////////////////// - - align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, - .Am, .ZmSticky, .KillProd); - // calculate the signs and take the opperation into account sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); + align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, + .Ps, .As, .Am, .ZmSticky, .KillProd); + + + // /////////////////////////////////////////////////////////////////////////////// // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm); + add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm); - loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt); + loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule @@ -142,6 +143,7 @@ endmodule module align( + input logic As, Ps, input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [`NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero @@ -172,7 +174,7 @@ module align( // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) assign ZmPreshifted = {Zm,(3*`NF+5)'(0)}; - assign KillProd = ACnt[`NE+1]|XZero|YZero; + assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); always_comb @@ -183,7 +185,7 @@ module align( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | if (KillProd) begin - ZmShifted = ZmPreshifted; + ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)}; ZmSticky = ~(XZero|YZero); // If the addend is too small to effect the addition @@ -221,6 +223,7 @@ module add( input logic [2*`NF+1:0] Pm, // the product's mantissa input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) input logic KillProd, // should the product be set to 0 + input logic ZmSticky, input logic XZero, YZero, // is the input zero output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed @@ -243,13 +246,14 @@ module add( assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) assign PmKilled = Pm&{2*`NF+2{~KillProd}}; - - - // Do the addition // - calculate a positive and negitive sum in parallel - assign PreSum = {{`NF+3{1'b0}}, PmKilled, 2'b0} + AmInv + {{3*`NF+6{1'b0}}, InvA}; - assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)}; + // Zsticky Psticky + // PreSum -1 = don't add 1 +1 = add 2 + // NegPreSum +1 = add 2 -1 = don't add 1 + // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 + assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; + assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; // Is the sum negitive assign NegSum = PreSum[3*`NF+6]; @@ -261,7 +265,7 @@ endmodule module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+1:0] P, // product + input logic [2*`NF+3:0] P, // product output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result ); @@ -273,12 +277,9 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; assign G[3*`NF+6:2*`NF+4] = 0; assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; - assign T[2*`NF+3:2] = A[2*`NF+3:2]^P; - assign G[2*`NF+3:2] = A[2*`NF+3:2]&P; - assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P; - assign T[1:0] = A[1:0]; - assign G[1:0] = 0; - assign Z[1:0] = ~A[1:0]; + assign T[2*`NF+3:0] = A[2*`NF+3:0]^P; + assign G[2*`NF+3:0] = A[2*`NF+3:0]&P; + assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P; // Apply function to determine Leading pattern diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 3c286b50f..ae974eb07 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -35,7 +35,6 @@ module fmashiftcalc( input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero - input logic ZDenorm, output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection @@ -54,7 +53,7 @@ module fmashiftcalc( // calculate the sum's exponent // ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4 - assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); + assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin @@ -149,9 +148,9 @@ module fmashiftcalc( // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm&~FmaKillProd ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1; + assign DenormShift = FmaPreResultDenorm ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; - assign FmaShiftAmt = (FmaNCnt&{$clog2(3*`NF+7){~FmaKillProd}})+DenormShift; + assign FmaShiftAmt = FmaNCnt+DenormShift; endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index bd018253a..5428481d9 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -127,7 +127,6 @@ module fpu ( //divide signals logic [`QLEN-1:0] QuotM; logic [`NE+1:0] DivCalcExpE, DivCalcExpM; - logic DivNegStickyE, DivNegStickyM; logic DivStickyE, DivStickyM; logic DivDoneM; logic [`DURLEN-1:0] EarlyTermShiftM; @@ -288,7 +287,7 @@ module fpu ( // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), - .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal + .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); // other FP execution units fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, @@ -384,7 +383,7 @@ module fpu ( postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM), - .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM), + .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv index 17db0c0b9..eb9d35594 100644 --- a/pipelined/src/fpu/lzacorrection.sv +++ b/pipelined/src/fpu/lzacorrection.sv @@ -37,7 +37,6 @@ module lzacorrection( input logic [`NE+1:0] DivDenormShift, input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection - input logic FmaKillProd, // is the product set to zero input logic FmaSZero, output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction output logic [`NE+1:0] DivCorrExp, @@ -59,7 +58,7 @@ module lzacorrection( assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSZero|ResDenorm)}}; + assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 30945532a..d7fcb2a00 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -56,7 +56,6 @@ module postprocess ( //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, input logic DivSticky, - input logic DivNegSticky, input logic DivDone, input logic [`NE+1:0] DivCalcExp, input logic [`QLEN-1:0] Quot, @@ -153,7 +152,7 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp, - .ZDenorm, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); + .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb @@ -183,7 +182,7 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp, + lzacorrection lzacorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); @@ -203,7 +202,7 @@ module postprocess ( round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, - .DivSticky, .DivNegSticky, .DivDone, + .DivSticky, .DivDone, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); /////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index c73edc088..e2b9cb3e9 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -55,7 +55,6 @@ module round( input logic [`NE:0] CvtCe, // the calculated expoent input logic [`NE+1:0] DivCorrExp, // the calculated expoent input logic DivSticky, // sticky bit - input logic DivNegSticky, output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow output logic [`NF-1:0] Rf, // Result fraction @@ -67,7 +66,6 @@ module round( output logic R, UfLSBRes // bits needed to calculate rounding ); logic LSBRes; // bit used for rounding - least significant bit of the normalized sum - logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result logic NormSumSticky; // normalized sum's sticky bit logic UfSticky; // sticky bit for underlow calculation @@ -254,40 +252,25 @@ module round( assign S = UfSticky | UfRound; - // Deterimine if a small number was supposed to be subtrated - // - for FMA or if division has a negitive sticky bit - assign SubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~(NormSumSticky|UfRound); - assign UfSubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~NormSumSticky; - - always_comb begin // Determine if you add 1 case (Frm) - 3'b000: CalcPlus1 = R & ((S| LSBRes)&~SubBySmallNum);//round to nearest even + 3'b000: CalcPlus1 = R & (S| LSBRes);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = Nsgn & ~(SubBySmallNum & ~R);//round down - 3'b011: CalcPlus1 = ~Nsgn & ~(SubBySmallNum & ~R);//round up - 3'b100: CalcPlus1 = R & ~SubBySmallNum;//round to nearest max magnitude + 3'b010: CalcPlus1 = Nsgn;//round down + 3'b011: CalcPlus1 = ~Nsgn;//round up + 3'b100: CalcPlus1 = R;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (Frm) - 3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even + 3'b000: UfCalcPlus1 = UfRound & (UfSticky| UfLSBRes);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = Nsgn & ~(UfSubBySmallNum & ~UfRound);//round down - 3'b011: UfCalcPlus1 = ~Nsgn & ~(UfSubBySmallNum & ~UfRound);//round up - 3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude + 3'b010: UfCalcPlus1 = Nsgn;//round down + 3'b011: UfCalcPlus1 = ~Nsgn;//round up + 3'b100: UfCalcPlus1 = UfRound;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase - // Determine if you subtract 1 - case (Frm) - 3'b000: CalcMinus1 = 0;//round to nearest even - 3'b001: CalcMinus1 = SubBySmallNum & ~R;//round to zero - 3'b010: CalcMinus1 = ~Nsgn & ~R & SubBySmallNum;//round down - 3'b011: CalcMinus1 = Nsgn & ~R & SubBySmallNum;//round up - 3'b100: CalcMinus1 = 0;//round to nearest max magnitude - default: CalcMinus1 = 1'bx; - endcase end @@ -295,26 +278,25 @@ module round( assign Plus1 = CalcPlus1 & (S | R); assign FpPlus1 = Plus1&~(ToInt&CvtOp); assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky - assign Minus1 = CalcMinus1 & (S | R); // Compute rounded result if (`FPSIZES == 1) begin - assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1}; + assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; end else if (`FPSIZES == 2) begin // \/FLEN+1 // | NE+2 | NF | // '-NE+2-^----NF1----^ // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 - assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} : - Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; + assign RoundAdd = OutFmt ? {{{`FLEN{1'b0}}}, FpPlus1} : + {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; end else if (`FPSIZES == 3) begin always_comb begin case (OutFmt) - `FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1}; - `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; - `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)}; + `FMT: RoundAdd = {{{`FLEN{1'b0}}}, FpPlus1}; + `FMT1: RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; + `FMT2: RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)}; default: RoundAdd = (`FLEN+1)'(0); endcase end @@ -322,10 +304,10 @@ module round( end else if (`FPSIZES == 4) begin always_comb begin case (OutFmt) - 2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1}; - 2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)}; - 2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)}; - 2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)}; + 2'h3: RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; + 2'h1: RoundAdd = {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)}; + 2'h0: RoundAdd = {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)}; + 2'h2: RoundAdd = {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)}; endcase end diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv index 5a7e96e2a..b1bf6f563 100644 --- a/pipelined/src/fpu/srt-radix4.sv +++ b/pipelined/src/fpu/srt-radix4.sv @@ -40,6 +40,7 @@ module srtradix4( input logic [`DIVLEN-1:0] X, input logic [`DIVLEN-1:0] Dpreproc, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, output logic [`QLEN-1:0] Quot, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] FirstWS, FirstWC, @@ -106,9 +107,9 @@ module srtradix4( // if starting a new divison set Q to 0 and QM to -1 mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); - flop #(`QLEN) QMreg(clk, QMMux, QM[0]); + flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - assign Quot = Q[0]; + assign Quot = NegSticky ? QM[0] : Q[0]; assign FirstWS = WS[0]; assign FirstWC = WC[0]; diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 21e35c365..481b1b223 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -44,7 +44,7 @@ module srtfsm( output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivStickyE, output logic DivDone, - output logic DivNegStickyE, + output logic NegSticky, output logic DivBusy ); @@ -62,7 +62,7 @@ module srtfsm( assign DivStickyE = |W; assign DivDone = (state == DONE); assign W = WC+WS; - assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? + assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? assign EarlyTermShiftE = step; always_ff @(posedge clk) begin diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 2aec1ab15..033045e7f 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -681,7 +681,7 @@ module testbenchfp; postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp), .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), - .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .DivNegSticky, + .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, @@ -697,8 +697,8 @@ module testbenchfp; .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), - .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), + .XInfE(XInf), .YInfE(YInf), .NegSticky(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); + srtradix4 srtradix4(.clk, .FmtE(ModFmt), .NegSticky(DivNegSticky), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); assign CmpFlg[3:0] = 0; @@ -854,7 +854,7 @@ end // check if result is correct // - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~(DivBusy|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); From 3dcddf845380f129f55f18881afe631f00c723af Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 13 Jul 2022 15:28:22 -0700 Subject: [PATCH 024/138] some code cleanup --- pipelined/src/fpu/fma.sv | 11 ++++------- pipelined/src/fpu/postprocess.sv | 7 +++---- pipelined/src/fpu/round.sv | 31 ++++++------------------------- 3 files changed, 13 insertions(+), 36 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 039876e9d..44cd3616a 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -51,7 +51,6 @@ module fma( logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1) logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed - logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum /////////////////////////////////////////////////////////////////////////////// // Calculate the product // - When multipliying two fp numbers, add the exponents @@ -74,7 +73,7 @@ module fma( sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, - .Ps, .As, .Am, .ZmSticky, .KillProd); + .Am, .ZmSticky, .KillProd); @@ -82,7 +81,7 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm); + add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm); loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule @@ -143,7 +142,6 @@ endmodule module align( - input logic As, Ps, input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [`NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero @@ -224,14 +222,13 @@ module add( input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) input logic KillProd, // should the product be set to 0 input logic ZmSticky, - input logic XZero, YZero, // is the input zero output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend - output logic [3*`NF+5:0] Sm, // the positive sum - output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum + output logic [3*`NF+5:0] Sm // the positive sum ); + logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum /////////////////////////////////////////////////////////////////////////////// // Addition diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index d7fcb2a00..3060e51d6 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -84,7 +84,6 @@ module postprocess ( logic S; // S bit logic UfPlus1; // do you add one (for determining underflow flag) logic R; // bits needed to determine rounding - logic [`FLEN:0] RoundAdd; // how much to add to the result logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result @@ -200,10 +199,10 @@ module postprocess ( roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, .Xs, .Ys, .CvtCs, .Nsgn); - round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, - .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, + round round(.OutFmt, .Frm, .S, .FmaZmSticky, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, + .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, .DivSticky, .DivDone, - .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfLSBRes, .Nexp); /////////////////////////////////////////////////////////////////////////////// // Sign calculation diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index e2b9cb3e9..38bacce0d 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -48,8 +48,6 @@ module round( input logic CvtResUf, input logic [`CORRSHIFTSZ-1:0] Nfrac, input logic FmaZmSticky, // addend's sticky bit - input logic ZZero, // is Z zero - input logic FmaInvA, // invert Z input logic [`NE+1:0] FmaSe, // exponent of the normalized sum input logic Nsgn, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -62,11 +60,10 @@ module round( output logic S, // sticky bit output logic [`NE+1:0] Nexp, output logic Plus1, - output logic [`FLEN:0] RoundAdd, // how much to add to the result output logic R, UfLSBRes // bits needed to calculate rounding ); logic LSBRes; // bit used for rounding - least significant bit of the normalized sum - logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result + logic UfCalcPlus1; logic NormSumSticky; // normalized sum's sticky bit logic UfSticky; // sticky bit for underlow calculation logic [`NF-1:0] RoundFrac; @@ -74,6 +71,7 @@ module round( logic UfRound; logic FpRound, FpLSBRes, FpUfRound; logic CalcPlus1, FpPlus1; + logic [`FLEN:0] RoundAdd; // how much to add to the result /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -288,30 +286,13 @@ module round( // | NE+2 | NF | // '-NE+2-^----NF1----^ // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 - assign RoundAdd = OutFmt ? {{{`FLEN{1'b0}}}, FpPlus1} : - {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; + assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt}; end else if (`FPSIZES == 3) begin - always_comb begin - case (OutFmt) - `FMT: RoundAdd = {{{`FLEN{1'b0}}}, FpPlus1}; - `FMT1: RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; - `FMT2: RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)}; - default: RoundAdd = (`FLEN+1)'(0); - endcase - end + assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)}; - end else if (`FPSIZES == 4) begin - always_comb begin - case (OutFmt) - 2'h3: RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; - 2'h1: RoundAdd = {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)}; - 2'h0: RoundAdd = {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)}; - 2'h2: RoundAdd = {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)}; - endcase - end - - end + end else if (`FPSIZES == 4) + assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)}; // determine the result to be roundned assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; From 0b91e7526f7d6a8507d9f8bc7b52e094d98ecf21 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 22:42:39 +0000 Subject: [PATCH 025/138] DIVLEN and counter updated for sqrt computation and rounding --- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/srt/srt-waves.do | 2 +- pipelined/srt/srt.sv | 35 +++++++++---------------- 3 files changed, 14 insertions(+), 25 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index ad52be2e7..73810c68d 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -103,7 +103,7 @@ // division constants `define RADIX 32'h4 `define DIVCOPIES 32'h4 -`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 1)) +`define DIVLEN ((`NF < `XLEN) ? (`XLEN + 2) : (`NF + 2)) `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) diff --git a/pipelined/srt/srt-waves.do b/pipelined/srt/srt-waves.do index 340c5b1f7..1e0c3f281 100644 --- a/pipelined/srt/srt-waves.do +++ b/pipelined/srt/srt-waves.do @@ -1,5 +1,5 @@ add wave -noupdate /testbench/* add wave -noupdate /testbench/srt/* -add wave -noupdate /testbench/srt/otfc2/* +add wave -noupdate /testbench/srt/sotfc2/* add wave -noupdate /testbench/srt/preproc/* add wave -noupdate /testbench/srt/divcounter/* diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 4cd23488b..74ce48cd0 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -29,8 +29,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" -`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0) -`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 1)) +`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF + 2) : 2) +`define EXTRAINTBITS ((`NF<(`XLEN)) ? 2 : (`NF - `XLEN + 2)) module srt ( input logic clk, @@ -49,7 +49,7 @@ module srt ( input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic rsign, done, - output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers + output logic [`DIVLEN-3:0] Rem, Quot, // *** later handle integers output logic [`NE-1:0] rExp, output logic [3:0] Flags ); @@ -164,7 +164,7 @@ module srtpreproc ( assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); // Number of cycles of divider - assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN + 2); + assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN); endmodule ///////////////////////////////// @@ -226,26 +226,16 @@ endmodule /////////////////////////////////// // On-The-Fly Converter, Radix 2 // /////////////////////////////////// -module otfc2 #(parameter N=64) ( +module otfc2 #(parameter N=66) ( input logic clk, input logic Start, input logic qp, qz, qn, - output logic [N-1:0] r + output logic [N-3:0] r ); - // The on-the-fly converter transfers the quotient - // bits to the quotient as they come. - // - // This code follows the psuedocode presented in the - // floating point chapter of the book. Right now, - // it is written for Radix-2 division. - // - // QM is Q-1. It allows us to write negative bits - // without using a costly CPA. + // bits to the quotient as they come. + // Use this otfc for division only. logic [N+2:0] Q, QM, QNext, QMNext, QMMux; - // QR and QMR are the shifted versions of Q and QM. - // They are treated as [N-1:r] size signals, and - // discard the r most significant bits of Q and QM. logic [N+1:0] QR, QMR; flopr #(N+3) Qreg(clk, Start, QNext, Q); @@ -266,7 +256,7 @@ module otfc2 #(parameter N=64) ( QMNext = {QMR, 1'b0}; end end - assign r = Q[N+2] ? Q[N+1:2] : Q[N:1]; + assign r = Q[N] ? Q[N-1:2] : Q[N-2:1]; endmodule @@ -278,13 +268,12 @@ module sotfc2( input logic Start, input logic sp, sn, input logic [`DIVLEN+3:0] C, - output logic [`DIVLEN-1:0] Sq, + output logic [`DIVLEN-3:0] Sq, output logic [`DIVLEN+3:0] F ); - - // The on-the-fly converter transfers the square root // bits to the quotient as they come. + // Use this otfc for division and square root. logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM); @@ -303,7 +292,7 @@ module sotfc2( SMNext = SM | ((C << 2) & ~(C << 1)); end end - assign Sq = S[`DIVLEN-1:0]; + assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:2] : S[`DIVLEN-2:1]; fsel2 fsel(sp, sn, C, S, SM, F); From 66bef379cb1e590655ff0316da09ce9caa4a5dff Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 13 Jul 2022 23:44:54 +0000 Subject: [PATCH 026/138] renamed a file to fit diagram --- pipelined/src/fpu/postprocess.sv | 2 +- pipelined/src/fpu/{lzacorrection.sv => shiftcorrection.sv} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename pipelined/src/fpu/{lzacorrection.sv => shiftcorrection.sv} (99%) diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 3060e51d6..bc9c46a26 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -181,7 +181,7 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - lzacorrection lzacorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp, + shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/shiftcorrection.sv similarity index 99% rename from pipelined/src/fpu/lzacorrection.sv rename to pipelined/src/fpu/shiftcorrection.sv index eb9d35594..f12cb831c 100644 --- a/pipelined/src/fpu/lzacorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -28,7 +28,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" -module lzacorrection( +module shiftcorrection( input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction input logic FmaOp, input logic DivOp, From ac2ad1d60a01830566c7e955c085ff6f01537342 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 14 Jul 2022 00:01:07 +0000 Subject: [PATCH 027/138] fixed uncommented line in makefile --- tests/riscof/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index 621a5b54b..af67a5357 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) XLEN ?= 64 -all: root build_arch build_wally memfile +all: root build_arch #build_wally memfile root: mkdir -p $(work_dir) From 8f7ffc3f296134a5d6845a235d745ada886dd627 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 14 Jul 2022 00:39:30 +0000 Subject: [PATCH 028/138] S and SM are updating but are not correct yet --- pipelined/srt/srt.sv | 12 ++++++------ pipelined/srt/testbench.sv | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 74ce48cd0..b87fabfec 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -215,7 +215,7 @@ module fsel2 ( // Generate for both positive and negative bits assign FP = ~S & C; assign FN = SM | (C & (~C << 2)); - assign FZ = {(`DIVLEN+4){1'B0}}; + assign FZ = {(`DIVLEN+4){1'b0}}; // Choose which adder input will be used @@ -276,20 +276,20 @@ module sotfc2( // Use this otfc for division and square root. logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; - flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM); + flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux); - flop #(`DIVLEN+4) SMreg(clk, SMux, S); + flop #(`DIVLEN+4) Sreg(clk, SMux, S); always_comb begin if (sp) begin - SNext = S | ((C << 2) & ~(C << 1)); + SNext = S | ((C << 1) & ~(C << 2)); SMNext = S; end else if (sn) begin - SNext = SM | ((C << 2) & ~(C << 1)); + SNext = SM | ((C << 1) & ~(C << 2)); SMNext = SM; end else begin // If sp and sn are not true, then sz is SNext = S; - SMNext = SM | ((C << 2) & ~(C << 1)); + SMNext = SM | ((C << 1) & ~(C << 2)); end end assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:2] : S[`DIVLEN-2:1]; diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index b83e6b00e..02cd0bca3 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -72,7 +72,7 @@ module testbench; // Equip Int test or Sqrt test assign Int = 1'b0; - assign Sqrt = 1'b0; + assign Sqrt = 1'b1; // Divider srt srt(.clk, .Start(req), @@ -155,7 +155,7 @@ module testbench; req <= #5 1; diffp = correctr[51:0] - r; diffn = r - correctr[51:0]; - if (rExp !== correctr[62:52]) // check if accurate to 1 ulp + if ((rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp begin errors = errors + 1; $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp); From 2fe8b6e34c57ecefd18ebdb2585f286641e6923c Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 14 Jul 2022 18:16:00 +0000 Subject: [PATCH 029/138] fixed error in divsqrt --- pipelined/config/shared/wally-shared.vh | 8 ++++---- pipelined/regression/sim-testfloat | 4 ++-- pipelined/regression/sim-testfloat-batch | 2 ++ pipelined/regression/wave-fpu.do | 18 +++++++++--------- pipelined/src/fpu/divshiftcalc.sv | 2 +- pipelined/src/fpu/divsqrt.sv | 4 ++-- pipelined/testbench/testbench-fp.sv | 9 ++++----- 7 files changed, 24 insertions(+), 23 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 73810c68d..5dc008bb5 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -97,14 +97,14 @@ `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) -`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9)) -`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) +`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+3) : (3*`NF+9)) +`define CORRSHIFTSZ ((`DIVRESLEN+`NF+3) > (3*`NF+8) ? (`DIVRESLEN+`NF+3) : (3*`NF+6)) // division constants `define RADIX 32'h4 `define DIVCOPIES 32'h4 -`define DIVLEN ((`NF < `XLEN) ? (`XLEN + 2) : (`NF + 2)) -`define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) +`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) +`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) `define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)) diff --git a/pipelined/regression/sim-testfloat b/pipelined/regression/sim-testfloat index 18f174a9e..25fe09a18 100755 --- a/pipelined/regression/sim-testfloat +++ b/pipelined/regression/sim-testfloat @@ -6,7 +6,7 @@ # fma - test fma # sub - test subtraction # div - test division -# sqrt - test square ro +# sqrt - test square root # all - test everything -vsim -do "do testfloat.do rv64fp mul" +vsim -do "do testfloat.do rv64fp $1" diff --git a/pipelined/regression/sim-testfloat-batch b/pipelined/regression/sim-testfloat-batch index f1178f1d2..c7f28a55e 100755 --- a/pipelined/regression/sim-testfloat-batch +++ b/pipelined/regression/sim-testfloat-batch @@ -1,7 +1,9 @@ + # cvtint - test integer conversion unit (fcvtint) # cvtfp - test floating-point conversion unit (fcvtfp) # cmp - test comparison unit's LT, LE, EQ opperations (fcmp) # add - test addition +# fma - test fma # sub - test subtraction # div - test division # sqrt - test square root diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 9a3d7e061..9caf75deb 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -9,23 +9,23 @@ add wave -noupdate /testbenchfp/Res add wave -noupdate /testbenchfp/Ans add wave -noupdate /testbenchfp/DivStart add wave -noupdate /testbenchfp/DivBusy -add wave -noupdate /testbenchfp/srtfsm/state +add wave -noupdate /testbenchfp/divsqrt/srtfsm/state add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/* -add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* -add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/qsel4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/otfc4/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/expcalc/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index a4f3feff6..3d31d863d 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -35,7 +35,7 @@ module divshiftcalc( // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)}; - // *** may be able to reduce shifter size + // *** QLEN can be changed to DIVLEN if we figure out what divLEN is - chenge normshiftsize definifion assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 7e240420c..91e07b080 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -59,10 +59,10 @@ module divsqrt( logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.XManE, .Dur, .YManE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, - .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); + .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 033045e7f..1493903e5 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -695,11 +695,10 @@ module testbenchfp; fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), - .XInfE(XInf), .YInfE(YInf), .NegSticky(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .NegSticky(DivNegSticky), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), - .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); + divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XManE(XMan), .YManE(YMan), .XExpE(XExp), .YExpE(YExp), + .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), + .StallE(1'b0), .StallM(1'b0), .DivStickyM(DivSticky), .DivBusy, .DivCalcExpM(DivCalcExp), + .EarlyTermShiftM(EarlyTermShift), .QuotM(Quot), .DivDone); assign CmpFlg[3:0] = 0; From cabd41a5a091d93735b3ed99c6971603d5f1cc4a Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 14 Jul 2022 19:38:27 +0000 Subject: [PATCH 030/138] Six tests passing and a bunch of sizizing issues fixed --- pipelined/config/shared/wally-shared.vh | 2 ++ pipelined/srt/sqrttestgen.c | 15 +++++++++--- pipelined/srt/srt.sv | 8 +++---- pipelined/srt/testbench.sv | 32 ++++++++++++------------- 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 5dc008bb5..1237ef184 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -104,6 +104,8 @@ `define RADIX 32'h4 `define DIVCOPIES 32'h4 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) +`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) +`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3)) `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c index 710fc32f3..07f34c3c7 100644 --- a/pipelined/srt/sqrttestgen.c +++ b/pipelined/srt/sqrttestgen.c @@ -30,11 +30,11 @@ void main(void) FILE *fptr; double aFrac, rFrac; int aExp, rExp; - double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625, + double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625, 1.75, 1.875, 1.99999, 1.1, 1.2, 1.01, 1.001, 1.0001, 2/1.1, 2/1.5, 2/1.25, 2/1.125}; - double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; int i; int bias = 1023; @@ -47,10 +47,19 @@ void main(void) for (i=0; i Date: Thu, 14 Jul 2022 21:19:45 +0000 Subject: [PATCH 031/138] Square root --- pipelined/srt/sqrttestgen.c | 4 ++-- pipelined/srt/srt.sv | 10 +++++----- pipelined/srt/testbench.sv | 3 +-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c index 07f34c3c7..76c6a6649 100644 --- a/pipelined/srt/sqrttestgen.c +++ b/pipelined/srt/sqrttestgen.c @@ -32,9 +32,9 @@ void main(void) int aExp, rExp; double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625, 1.75, 1.875, 1.99999, - 1.1, 1.2, 1.01, 1.001, 1.0001, + 1.1, 1.5, 1.01, 1.001, 1.0001, 2/1.1, 2/1.5, 2/1.25, 2/1.125}; - double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, + double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16}; int i; int bias = 1023; diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 8e143efbb..5dcf7e96f 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -75,7 +75,7 @@ module srt ( // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qn); + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], qp, qz, qn); flopen #(`NE) expflop(clk, Start, calcExp, rExp); flopen #(1) signflop(clk, Start, calcSign, rsign); @@ -169,11 +169,11 @@ endmodule // Quotient Selection, Radix 2 // ///////////////////////////////// module qsel2 ( // *** eventually just change to 4 bits - input logic [`DIVLEN+3:`DIVLEN] ps, pc, + input logic [`DIVLEN+3:`DIVLEN-1] ps, pc, output logic qp, qz, qn ); - logic [`DIVLEN+3:`DIVLEN] p, g; + logic [`DIVLEN+3:`DIVLEN-1] p, g; logic magnitude, sign, cout; // The quotient selection logic is presented for simplicity, not @@ -184,8 +184,8 @@ module qsel2 ( // *** eventually just change to 4 bits assign p = ps ^ pc; assign g = ps & pc; - assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); - assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); + assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]); + assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (p[`DIVLEN] & g[`DIVLEN-1])))); assign #1 sign = p[`DIVLEN+3] ^ cout; /* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & (ps[52]^pc[52])); diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index bbb6dee24..39696af44 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -160,10 +160,9 @@ module testbench; errors = errors + 1; $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp); $display("failed\n"); - $stop; end if (afrac === 52'hxxxxxxxxxxxxx) begin - $display("%d Tests completed successfully", testnum); + $display("%d Tests completed successfully", testnum-errors); $stop; end end end From 8c57eca2625b3df0f3670f7f0ca5e02388a319c9 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 14 Jul 2022 22:52:09 +0000 Subject: [PATCH 032/138] Square root radix 2 working, does not work with division --- pipelined/srt/srt.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 5dcf7e96f..949335bf0 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -75,7 +75,7 @@ module srt ( // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], qp, qz, qn); + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], Sqrt, qp, qz, qn); flopen #(`NE) expflop(clk, Start, calcExp, rExp); flopen #(1) signflop(clk, Start, calcSign, rsign); @@ -170,6 +170,7 @@ endmodule ///////////////////////////////// module qsel2 ( // *** eventually just change to 4 bits input logic [`DIVLEN+3:`DIVLEN-1] ps, pc, + input logic Sqrt, output logic qp, qz, qn ); @@ -185,7 +186,7 @@ module qsel2 ( // *** eventually just change to 4 bits assign g = ps & pc; assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]); - assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (p[`DIVLEN] & g[`DIVLEN-1])))); + assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (Sqrt & (p[`DIVLEN] & g[`DIVLEN-1]))))); assign #1 sign = p[`DIVLEN+3] ^ cout; /* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & (ps[52]^pc[52])); From 5cb9c9f319d4cc3e254de666b070279a097c0e2c Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 15 Jul 2022 20:16:59 +0000 Subject: [PATCH 033/138] merged floating-point radix-2 divider with radix-4 --- pipelined/config/shared/wally-shared.vh | 11 +- pipelined/regression/sim-wally | 2 +- pipelined/regression/wave-fpu.do | 18 +- pipelined/src/fpu/divshiftcalc.sv | 8 +- pipelined/src/fpu/divsqrt.sv | 11 +- pipelined/src/fpu/fpu.sv | 2 +- pipelined/src/fpu/postprocess.sv | 2 +- pipelined/src/fpu/shiftcorrection.sv | 6 +- pipelined/src/fpu/srt-radix4.sv | 359 ------------------------ pipelined/src/fpu/srtfsm.sv | 14 +- pipelined/src/fpu/srtpreproc.sv | 10 +- pipelined/testbench/testbench-fp.sv | 2 +- 12 files changed, 52 insertions(+), 393 deletions(-) delete mode 100644 pipelined/src/fpu/srt-radix4.sv diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 1237ef184..015ef2611 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -97,19 +97,20 @@ `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) -`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+3) : (3*`NF+9)) -`define CORRSHIFTSZ ((`DIVRESLEN+`NF+3) > (3*`NF+8) ? (`DIVRESLEN+`NF+3) : (3*`NF+6)) +`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9)) +`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h4 -`define DIVCOPIES 32'h4 +`define RADIX 32'h2 +`define DIVCOPIES 32'h1 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) `define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3)) `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) -`define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)) +// one interation is required for the integer bit for minimally redundent radix-4 +`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) diff --git a/pipelined/regression/sim-wally b/pipelined/regression/sim-wally index 069851489..51c8b3edc 100755 --- a/pipelined/regression/sim-wally +++ b/pipelined/regression/sim-wally @@ -1,2 +1,2 @@ -vsim -do "do wally-pipelined.do rv32gc arch32i" +vsim -do "do wally-pipelined.do rv64gc arch64d" diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 9caf75deb..98c72f170 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -20,12 +20,20 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* -add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/qsel4/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/otfc4/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WC +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WS +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WCA +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WSA +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/Q +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QM +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/* +# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/* +# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* -add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/expcalc/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index 3d31d863d..af321b256 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -1,7 +1,7 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`QLEN-1:0] Quot, + input logic [`QLEN-1-(`RADIX/4):0] Quot, input logic [`FMTBITS-1:0] Fmt, input logic [`DURLEN-1:0] DivEarlyTermShift, input logic [`NE+1:0] DivCalcExp, @@ -30,12 +30,12 @@ module divshiftcalc( // 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards) // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after) // inital Left shift amount = NF + // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit assign NormShift = (`NE+2)'(`NF); // if the shift amount is negitive then dont shift (keep sticky bit) // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) - assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)}; + assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}}; - // *** QLEN can be changed to DIVLEN if we figure out what divLEN is - chenge normshiftsize definifion - assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}}; + assign DivShiftIn = {{`NF{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 91e07b080..cbf7f95f0 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -47,22 +47,23 @@ module divsqrt( output logic DivDone, output logic [`NE+1:0] DivCalcExpM, output logic [`DURLEN-1:0] EarlyTermShiftM, - output logic [`QLEN-1:0] QuotM + output logic [`QLEN-1-(`RADIX/4):0] QuotM // output logic [`XLEN-1:0] RemM, ); logic [`DIVLEN+3:0] NextWSN, NextWCN; logic [`DIVLEN+3:0] WS, WC; + logic [`DIVLEN+3:0] StickyWSA; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] Dpreproc; logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.XManE, .Dur, .YManE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, - .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, - .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); + .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); + srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 5428481d9..1bbd0aea5 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -125,7 +125,7 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`QLEN-1:0] QuotM; + logic [`QLEN-1-(`RADIX/4):0] QuotM; logic [`NE+1:0] DivCalcExpE, DivCalcExpM; logic DivStickyE, DivStickyM; logic DivDoneM; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index bc9c46a26..e0eb50ac8 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -58,7 +58,7 @@ module postprocess ( input logic DivSticky, input logic DivDone, input logic [`NE+1:0] DivCalcExp, - input logic [`QLEN-1:0] Quot, + input logic [`QLEN-1-(`RADIX/4):0] Quot, // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index f12cb831c..ecfd9ba08 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -43,7 +43,7 @@ module shiftcorrection( output logic [`NE+1:0] FmaSe // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction - logic [`CORRSHIFTSZ:0] CorrQuotShifted; + logic [`CORRSHIFTSZ-1:0] CorrQuotShifted; logic ResDenorm; // is the result denormalized logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction @@ -53,9 +53,9 @@ module shiftcorrection( // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) - assign CorrQuotShifted = {LZAPlus2|(DivCalcExp==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0}; + assign CorrQuotShifted = (LZAPlus2|(DivCalcExp==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits - assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv deleted file mode 100644 index b1bf6f563..000000000 --- a/pipelined/src/fpu/srt-radix4.sv +++ /dev/null @@ -1,359 +0,0 @@ -/////////////////////////////////////////// -// srt.sv -// -// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek -// Modified:13 January 2022 -// -// Purpose: Combined Divide and Square Root Floating Point and Integer Unit -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module srtradix4( - input logic clk, - input logic DivStart, - input logic DivBusy, - input logic [`FMTBITS-1:0] FmtE, - input logic [`NE-1:0] XExpE, YExpE, - input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, - output logic [`QLEN-1:0] Quot, - output logic [`DIVLEN+3:0] NextWSN, NextWCN, - output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] DivCalcExpM, - output logic [`XLEN-1:0] Rem -); - - - /* verilator lint_off UNOPTFLAT */ - logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] Q[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; - /* verilator lint_on UNOPTFLAT */ - logic [`DIVLEN+3:0] WSN, WCN; - logic [`DIVLEN+3:0] D, DBar, D2, DBar2; - logic [`NE+1:0] DivCalcExp; - logic [$clog2(`XLEN+1)-1:0] intExp; - logic intSign; - logic [`QLEN-1:0] QMMux; - - // Top Muxes and Registers - // When start is asserted, the inputs are loaded into the divider. - // Otherwise, the divisor is retained and the partial remainder - // is fed back for the next iteration. - // - when the start signal is asserted X and 0 are loaded into WS and WC - // - otherwise load WSA into the flipflop - // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) - // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized - assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; - assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; - mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); - flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]); - mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); - flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]); - flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); - flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); - - - // Divisor Selections - // - choose the negitive version of what's being selected - assign DBar = ~D; - assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; - assign D2 = {D[`DIVLEN+2:0], 1'b0}; - - genvar i; - generate - for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin - divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, - .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); - if(i<(`DIVCOPIES-1)) begin - assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; - assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; - assign Q[i+1] = QNext[i]; - assign QM[i+1] = QMNext[i]; - end - end - endgenerate - - // if starting a new divison set Q to 0 and QM to -1 - mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); - flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); - flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - - assign Quot = NegSticky ? QM[0] : Q[0]; - assign FirstWS = WS[0]; - assign FirstWC = WC[0]; - - expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); - -endmodule - -//////////////// -// Submodules // -//////////////// - - /* verilator lint_off UNOPTFLAT */ -module divinteration ( - input logic clk, - input logic DivStart, - input logic DivBusy, - input logic [`DIVLEN+3:0] D, - input logic [`DIVLEN+3:0] DBar, D2, DBar2, - input logic [`QLEN-1:0] Q, QM, - input logic [`DIVLEN+3:0] WS, WC, - output logic [`QLEN-1:0] QNext, QMNext, - output logic [`DIVLEN+3:0] WSA, WCA -); - /* verilator lint_on UNOPTFLAT */ - - logic [`DIVLEN+3:0] Dsel; - logic [3:0] q; - - // Quotient Selection logic - // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - // q encoding: - // 1000 = +2 - // 0100 = +1 - // 0000 = 0 - // 0010 = -1 - // 0001 = -2 - qsel4 qsel4(.D, .WS, .WC, .q); - - always_comb - case (q) - 4'b1000: Dsel = DBar2; - 4'b0100: Dsel = DBar; - 4'b0000: Dsel = {`DIVLEN+4{1'b0}}; - 4'b0010: Dsel = D; - 4'b0001: Dsel = D2; - default: Dsel = {`DIVLEN+4{1'bx}}; - endcase - - // Partial Product Generation - // WSA, WCA = WS + WC - qD - csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); - - otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Q, .QM, .QNext, .QMNext); - -endmodule - -module qsel4 ( - input logic [`DIVLEN+3:0] D, - input logic [`DIVLEN+3:0] WS, WC, - output logic [3:0] q -); - logic [6:0] Wmsbs; - logic [7:0] PreWmsbs; - logic [2:0] Dmsbs; - assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; - assign Wmsbs = PreWmsbs[7:1]; - assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; - // D = 0001.xxx... - // Dmsbs = | | - // W = xxxx.xxx... - // Wmsbs = | | - - logic [3:0] QSel4[1023:0]; - - always_comb begin - integer d, w, i, w2; - for(d=0; d<8; d++) - for(w=0; w<128; w++)begin - i = d*128+w; - w2 = w-128*(w>=64); // convert to two's complement - case(d) - 0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-4) QSel4[i] = 4'b0000; - else if(w2>=-13) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 1: if(w2>=14) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-15) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 2: if(w2>=15) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-16) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 3: if(w2>=16) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-18) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 4: if(w2>=18) QSel4[i] = 4'b1000; - else if(w2>=6) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 5: if(w2>=20) QSel4[i] = 4'b1000; - else if(w2>=6) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 6: if(w2>=20) QSel4[i] = 4'b1000; - else if(w2>=8) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-22) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 7: if(w2>=24) QSel4[i] = 4'b1000; - else if(w2>=8) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-24) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - endcase - end - end - assign q = QSel4[{Dmsbs,Wmsbs}]; - -endmodule - -/////////////////////////////////// -// On-The-Fly Converter, Radix 2 // -/////////////////////////////////// -module otfc4 ( - input logic clk, - input logic DivStart, - input logic DivBusy, - input logic [3:0] q, - input logic [`QLEN-1:0] Q, QM, - output logic [`QLEN-1:0] QNext, QMNext -); - - // The on-the-fly converter transfers the quotient - // bits to the quotient as they come. - // - // This code follows the psuedocode presented in the - // floating point chapter of the book. Right now, - // it is written for Radix-4 division. - // - // QM is Q-1. It allows us to write negative bits - // without using a costly CPA. - - // QR and QMR are the shifted versions of Q and QM. - // They are treated as [N-1:r] size signals, and - // discard the r most significant bits of Q and QM. - logic [`QLEN-3:0] QR, QMR; - - // shift Q (quotent) and QM (quotent-1) - // if q = 2 Q = {Q, 10} QM = {Q, 01} - // else if q = 1 Q = {Q, 01} QM = {Q, 00} - // else if q = 0 Q = {Q, 00} QM = {QM, 11} - // else if q = -1 Q = {QM, 11} QM = {QM, 10} - // else if q = -2 Q = {QM, 10} QM = {QM, 01} - // *** how does the 0 concatination numbers work? - - assign QR = Q[`QLEN-3:0]; - assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM - always_comb begin - if (q[3]) begin // +2 - QNext = {QR, 2'b10}; - QMNext = {QR, 2'b01}; - end else if (q[2]) begin // +1 - QNext = {QR, 2'b01}; - QMNext = {QR, 2'b00}; - end else if (q[1]) begin // -1 - QNext = {QMR, 2'b11}; - QMNext = {QMR, 2'b10}; - end else if (q[0]) begin // -2 - QNext = {QMR, 2'b10}; - QMNext = {QMR, 2'b01}; - end else begin // 0 - QNext = {QR, 2'b00}; - QMNext = {QMR, 2'b11}; - end - end - // Final Quoteint is in the range [.5, 2) - -endmodule - - - -///////// -// csa // -///////// -module csa #(parameter N=69) ( - input logic [N-1:0] in1, in2, in3, - input logic cin, - output logic [N-1:0] out1, out2 -); - - // This block adds in1, in2, in3, and cin to produce - // a result out1 / out2 in carry-save redundant form. - // cin is just added to the least significant bit and - // is Startuired to handle adding a negative divisor. - // Fortunately, the carry (out2) is shifted left by one - // bit, leaving room in the least significant bit to - // insert cin. - - assign out1 = in1 ^ in2 ^ in3; - assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | - (in2[N-2:0] & in3[N-2:0]), cin}; -endmodule - -module expcalc( - input logic [`FMTBITS-1:0] FmtE, - input logic [`NE-1:0] XExpE, YExpE, - input logic XZeroE, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`NE+1:0] DivCalcExp - ); - logic [`NE-2:0] Bias; - - if (`FPSIZES == 1) begin - assign Bias = (`NE-1)'(`BIAS); - - end else if (`FPSIZES == 2) begin - assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); - - end else if (`FPSIZES == 3) begin - always_comb - case (FmtE) - `FMT: Bias = (`NE-1)'(`BIAS); - `FMT1: Bias = (`NE-1)'(`BIAS1); - `FMT2: Bias = (`NE-1)'(`BIAS2); - default: Bias = 'x; - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (FmtE) - 2'h3: Bias = (`NE-1)'(`Q_BIAS); - 2'h1: Bias = (`NE-1)'(`D_BIAS); - 2'h0: Bias = (`NE-1)'(`S_BIAS); - 2'h2: Bias = (`NE-1)'(`H_BIAS); - endcase - end - // correct exponent for denormalized input's normalization shifts - assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; - endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 481b1b223..634ecc1d3 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -38,8 +38,9 @@ module srtfsm( input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, input logic DivStart, - input logic StallE, - input logic StallM, + input logic StallE, + input logic StallM, + input logic [`DIVLEN+3:0] StickyWSA, input logic [`DURLEN-1:0] Dur, output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivStickyE, @@ -59,7 +60,14 @@ module srtfsm( //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); assign DivBusy = (state == BUSY); assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0}); - assign DivStickyE = |W; + // calculate sticky bit + // - there is a chance that a value is subtracted infinitly, resulting in an exact QM result + // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant + // radix-4 division can't create a QM that continually adds 0's + if (`RADIX == 2) + assign DivStickyE = |W&~(StickyWSA == WS); + else + assign DivStickyE = |W; assign DivDone = (state == DONE); assign W = WC+WS; assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index 7386332f8..b9fb8bb82 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -31,7 +31,7 @@ `include "wally-config.vh" module srtpreproc ( - input logic [`NF:0] XManE, YManE, + input logic [`NF:0] Xm, Ym, output logic [`DIVLEN-1:0] X, output logic [`DIVLEN-1:0] Dpreproc, output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, @@ -49,16 +49,16 @@ module srtpreproc ( // ***can probably merge X LZC with conversion // cout the number of leading zeros - lzc #(`NF+1) lzcA (XManE, XZeroCnt); - lzc #(`NF+1) lzcB (YManE, YZeroCnt); + lzc #(`NF+1) lzcA (Xm, XZeroCnt); + lzc #(`NF+1) lzcB (Ym, YZeroCnt); // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; // assign PreprocA = ExtraA << zeroCntA; // assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {XManE[`NF-1:0]< Date: Fri, 15 Jul 2022 21:42:45 +0000 Subject: [PATCH 034/138] forgot some files --- pipelined/src/fpu/otfc.sv | 112 +++++++++++++++++ pipelined/src/fpu/qsel.sv | 135 ++++++++++++++++++++ pipelined/src/fpu/srt.sv | 259 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 506 insertions(+) create mode 100644 pipelined/src/fpu/otfc.sv create mode 100644 pipelined/src/fpu/qsel.sv create mode 100644 pipelined/src/fpu/srt.sv diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv new file mode 100644 index 000000000..8d11273a2 --- /dev/null +++ b/pipelined/src/fpu/otfc.sv @@ -0,0 +1,112 @@ +/////////////////////////////////////////// +// otfc.sv +// +// Written: me@KatherineParry.com, cturek@hmc.edu +// Modified:7/14/2022 +// +// Purpose: On the fly conversion +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module otfc2 ( + input logic qp, qz, + input logic [`QLEN-1:0] Q, QM, + output logic [`QLEN-1:0] QNext, QMNext +); + // The on-the-fly converter transfers the quotient + // bits to the quotient as they come. + // Use this otfc for division only. + logic [`QLEN-2:0] QR, QMR; + + assign QR = Q[`QLEN-2:0]; + assign QMR = QM[`QLEN-2:0]; // Shifted Q and QM + + always_comb begin + if (qp) begin + QNext = {QR, 1'b1}; + QMNext = {QR, 1'b0}; + end else if (qz) begin + QNext = {QR, 1'b0}; + QMNext = {QMR, 1'b1}; + end else begin // If qp and qz are not true, then qn is + QNext = {QMR, 1'b1}; + QMNext = {QMR, 1'b0}; + end + end + +endmodule + + +module otfc4 ( + input logic [3:0] q, + input logic [`QLEN-1:0] Q, QM, + output logic [`QLEN-1:0] QNext, QMNext +); + + // The on-the-fly converter transfers the quotient + // bits to the quotient as they come. + // + // This code follows the psuedocode presented in the + // floating point chapter of the book. Right now, + // it is written for Radix-4 division. + // + // QM is Q-1. It allows us to write negative bits + // without using a costly CPA. + + // QR and QMR are the shifted versions of Q and QM. + // They are treated as [N-1:r] size signals, and + // discard the r most significant bits of Q and QM. + logic [`QLEN-3:0] QR, QMR; + + // shift Q (quotent) and QM (quotent-1) + // if q = 2 Q = {Q, 10} QM = {Q, 01} + // else if q = 1 Q = {Q, 01} QM = {Q, 00} + // else if q = 0 Q = {Q, 00} QM = {QM, 11} + // else if q = -1 Q = {QM, 11} QM = {QM, 10} + // else if q = -2 Q = {QM, 10} QM = {QM, 01} + + assign QR = Q[`QLEN-3:0]; + assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM + always_comb begin + if (q[3]) begin // +2 + QNext = {QR, 2'b10}; + QMNext = {QR, 2'b01}; + end else if (q[2]) begin // +1 + QNext = {QR, 2'b01}; + QMNext = {QR, 2'b00}; + end else if (q[1]) begin // -1 + QNext = {QMR, 2'b11}; + QMNext = {QMR, 2'b10}; + end else if (q[0]) begin // -2 + QNext = {QMR, 2'b10}; + QMNext = {QMR, 2'b01}; + end else begin // 0 + QNext = {QR, 2'b00}; + QMNext = {QMR, 2'b11}; + end + end + // Final Quoteint is in the range [.5, 2) + +endmodule diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv new file mode 100644 index 000000000..396ca7761 --- /dev/null +++ b/pipelined/src/fpu/qsel.sv @@ -0,0 +1,135 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module qsel2 ( // *** eventually just change to 4 bits + input logic [`DIVLEN+3:`DIVLEN] ps, pc, + output logic qp, qz//, qn +); + + logic [`DIVLEN+3:`DIVLEN] p, g; + logic magnitude, sign, cout; + + // The quotient selection logic is presented for simplicity, not + // for efficiency. You can probably optimize your logic to + // select the proper divisor with less delay. + + // Quotient equations from EE371 lecture notes 13-20 + assign p = ps ^ pc; + assign g = ps & pc; + + assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); + assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); + assign sign = p[`DIVLEN+3] ^ cout; +/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & + (ps[52]^pc[52])); + assign #1 sign = (ps[55]^pc[55])^ + (ps[54] & pc[54] | ((ps[54]^pc[54]) & + (ps[53]&pc[53] | ((ps[53]^pc[53]) & + (ps[52]&pc[52]))))); */ + + // Produce quotient = +1, 0, or -1 + assign qp = magnitude & ~sign; + assign qz = ~magnitude; +// assign #1 qn = magnitude & sign; +endmodule + +module qsel4 ( + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] WS, WC, + output logic [3:0] q +); + logic [6:0] Wmsbs; + logic [7:0] PreWmsbs; + logic [2:0] Dmsbs; + assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; + assign Wmsbs = PreWmsbs[7:1]; + assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + // D = 0001.xxx... + // Dmsbs = | | + // W = xxxx.xxx... + // Wmsbs = | | + + logic [3:0] QSel4[1023:0]; + + always_comb begin + integer d, w, i, w2; + for(d=0; d<8; d++) + for(w=0; w<128; w++)begin + i = d*128+w; + w2 = w-128*(w>=64); // convert to two's complement + case(d) + 0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-4) QSel4[i] = 4'b0000; + else if(w2>=-13) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 1: if(w2>=14) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-15) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 2: if(w2>=15) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-16) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 3: if(w2>=16) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-18) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 4: if(w2>=18) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 5: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 6: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-22) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 7: if(w2>=24) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-24) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + endcase + end + end + assign q = QSel4[{Dmsbs,Wmsbs}]; + +endmodule diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv new file mode 100644 index 000000000..9e0315113 --- /dev/null +++ b/pipelined/src/fpu/srt.sv @@ -0,0 +1,259 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module srt( + input logic clk, + input logic DivStart, + input logic DivBusy, + input logic [`FMTBITS-1:0] FmtE, + input logic [`NE-1:0] Xe, Ye, + input logic XZeroE, YZeroE, + input logic [`DIVLEN-1:0] X, + input logic [`DIVLEN-1:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, + output logic [`QLEN-1-(`RADIX/4):0] Quot, + output logic [`DIVLEN+3:0] NextWSN, NextWCN, + output logic [`DIVLEN+3:0] StickyWSA, + output logic [`DIVLEN+3:0] FirstWS, FirstWC, + output logic [`NE+1:0] DivCalcExpM, + output logic [`XLEN-1:0] Rem +); + + + /* verilator lint_off UNOPTFLAT */ + logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] Q[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; + /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] WSN, WCN; + logic [`DIVLEN+3:0] D, DBar, D2, DBar2; + logic [`NE+1:0] DivCalcExp; + logic [$clog2(`XLEN+1)-1:0] intExp; + logic intSign; + logic [`QLEN-1:0] QMMux; + + // Top Muxes and Registers + // When start is asserted, the inputs are loaded into the divider. + // Otherwise, the divisor is retained and the partial remainder + // is fed back for the next iteration. + // - when the start signal is asserted X and 0 are loaded into WS and WC + // - otherwise load WSA into the flipflop + // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) + // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized + if (`RADIX == 2) begin : nextw + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0}; + end else begin + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + end + + mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); + flopen #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); + mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); + flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); + flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); + flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); + + + // Divisor Selections + // - choose the negitive version of what's being selected + assign DBar = ~D; + if(`RADIX == 4) begin : d2 + assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; + assign D2 = {D[`DIVLEN+2:0], 1'b0}; + end + + genvar i; + generate + for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations + divinteration divinteration(.D, .DBar, .D2, .DBar2, + .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); + if(i<(`DIVCOPIES-1)) begin + if (`RADIX==2)begin + assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0}; + assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 1'b0}; + end else begin + assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; + assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; + end + assign Q[i+1] = QNext[i]; + assign QM[i+1] = QMNext[i]; + end + end + endgenerate + + // if starting a new divison set Q to 0 and QM to -1 + mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); + flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); + flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); + + assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; + assign FirstWS = WS[0]; + assign FirstWC = WC[0]; + if(`RADIX==2) + if (`DIVCOPIES == 1) + assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0}; + else + assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; + + expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); + +endmodule + +//////////////// +// Submodules // +//////////////// + + /* verilator lint_off UNOPTFLAT */ +module divinteration ( + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] DBar, D2, DBar2, + input logic [`QLEN-1:0] Q, QM, + input logic [`DIVLEN+3:0] WS, WC, + output logic [`QLEN-1:0] QNext, QMNext, + output logic [`DIVLEN+3:0] WSA, WCA +); + /* verilator lint_on UNOPTFLAT */ + + logic [`DIVLEN+3:0] Dsel; + logic [3:0] q; + logic qp, qz;//, qn; + + // Quotient Selection logic + // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) + // q encoding: + // 1000 = +2 + // 0100 = +1 + // 0000 = 0 + // 0010 = -1 + // 0001 = -2 + if(`RADIX == 2) begin : qsel + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn); + end else begin + qsel4 qsel4(.D, .WS, .WC, .q); + end + + if(`RADIX == 2) begin : dsel + assign Dsel = {`DIVLEN+4{~qz}}&(qp ? DBar : D); + end else begin + always_comb + case (q) + 4'b1000: Dsel = DBar2; + 4'b0100: Dsel = DBar; + 4'b0000: Dsel = '0; + 4'b0010: Dsel = D; + 4'b0001: Dsel = D2; + default: Dsel = 'x; + endcase + end + // Partial Product Generation + // WSA, WCA = WS + WC - qD + if (`RADIX == 2) begin : csa + csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); + end else begin + csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); + end + + if (`RADIX == 2) begin : otfc + otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext); + end else begin + otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); + end + +endmodule + + +///////// +// csa // +///////// +module csa #(parameter N=69) ( + input logic [N-1:0] in1, in2, in3, + input logic cin, + output logic [N-1:0] out1, out2 +); + + // This block adds in1, in2, in3, and cin to produce + // a result out1 / out2 in carry-save redundant form. + // cin is just added to the least significant bit and + // is Startuired to handle adding a negative divisor. + // Fortunately, the carry (out2) is shifted left by one + // bit, leaving room in the least significant bit to + // insert cin. + + assign out1 = in1 ^ in2 ^ in3; + assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | + (in2[N-2:0] & in3[N-2:0]), cin}; +endmodule + +module expcalc( + input logic [`FMTBITS-1:0] FmtE, + input logic [`NE-1:0] Xe, Ye, + input logic XZeroE, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + output logic [`NE+1:0] DivCalcExp + ); + logic [`NE-2:0] Bias; + + if (`FPSIZES == 1) begin + assign Bias = (`NE-1)'(`BIAS); + + end else if (`FPSIZES == 2) begin + assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + + end else if (`FPSIZES == 3) begin + always_comb + case (FmtE) + `FMT: Bias = (`NE-1)'(`BIAS); + `FMT1: Bias = (`NE-1)'(`BIAS1); + `FMT2: Bias = (`NE-1)'(`BIAS2); + default: Bias = 'x; + endcase + + end else if (`FPSIZES == 4) begin + always_comb + case (FmtE) + 2'h3: Bias = (`NE-1)'(`Q_BIAS); + 2'h1: Bias = (`NE-1)'(`D_BIAS); + 2'h0: Bias = (`NE-1)'(`S_BIAS); + 2'h2: Bias = (`NE-1)'(`H_BIAS); + endcase + end + // correct exponent for denormalized input's normalization shifts + assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; + endmodule \ No newline at end of file From 622773343fa0c0976ff733fc7e9882b568843190 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 16 Jul 2022 17:43:31 -0700 Subject: [PATCH 035/138] restored intPending logic to be sticky for PLIC --- pipelined/src/uncore/plic_apb.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelined/src/uncore/plic_apb.sv b/pipelined/src/uncore/plic_apb.sv index f83033c4d..51e94d7f4 100644 --- a/pipelined/src/uncore/plic_apb.sv +++ b/pipelined/src/uncore/plic_apb.sv @@ -172,8 +172,8 @@ module plic_apb ( end // pending interrupt requests - //assign nextIntPending = (intPending | requests) & ~intInProgress; // - assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion + assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots. Confirmed to boot successfully. + //assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending); // context-dependent signals From 9514abf0e062dd636be89c00da91504bea753e3c Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 17 Jul 2022 01:39:57 +0000 Subject: [PATCH 036/138] Don't delete hdl directory at end of run --- synthDC/Makefile | 2 +- synthDC/extractSummary.py | 7 +++++++ synthDC/scripts/synth.tcl | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index 369529e39..98b719428 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -107,7 +107,7 @@ ifeq ($(SAIFPOWER), 1) cp -f ../pipelined/regression/power.saif . endif dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out - rm -rf $(OUTPUTDIR)/hdl +# rm -rf $(OUTPUTDIR)/hdl rm -rf $(OUTPUTDIR)/WORK rm -rf $(OUTPUTDIR)/alib-52 diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index 93363a069..29e1c8024 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -78,6 +78,13 @@ def freqPlot(tech, width, config): ''' plots delay, area for syntheses with specified tech, module, width ''' + current_directory = os.getcwd() + final_directory = os.path.join(current_directory, 'plots/wally') +# if not os.path.exists(final_directory): +# os.makedirs(final_directory) +# os.chdir(final_directory) + + freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: if (width == oneSynth.width) & (config == oneSynth.config) & (tech == oneSynth.tech) & ('' == oneSynth.special): diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 251522dc8..9b72849f8 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -56,7 +56,7 @@ set vhdlout_show_unconnected_pins "true" # Due to parameterized Verilog must use analyze/elaborate and not # read_verilog/vhdl (change to pull in Verilog and/or VHDL) # -set alib_library_analysis_path ./$outputDir +#set alib_library_analysis_path ./$outputDir define_design_lib WORK -path ./$outputDir/WORK analyze -f sverilog -lib WORK $my_verilog_files elaborate $my_toplevel -lib WORK From bb3c455325905bc218d680afa40982686d53106c Mon Sep 17 00:00:00 2001 From: James Stine Date: Sun, 17 Jul 2022 11:06:30 -0500 Subject: [PATCH 037/138] Add import os in extractSummary.py --- synthDC/extractSummary.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index 29e1c8024..d4f86fb3e 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -11,6 +11,7 @@ import numpy as np from ppa.ppaAnalyze import noOutliers from matplotlib import ticker import argparse +import os def synthsintocsv(): From 03f573351a5335866bd6dc79bd902aa5b16378a4 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 17 Jul 2022 16:40:58 +0000 Subject: [PATCH 038/138] Rewrote convert shift calculation with always for ease of reading --- pipelined/src/fpu/cvtshiftcalc.sv | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pipelined/src/fpu/cvtshiftcalc.sv b/pipelined/src/fpu/cvtshiftcalc.sv index 4d346d970..aef894f90 100644 --- a/pipelined/src/fpu/cvtshiftcalc.sv +++ b/pipelined/src/fpu/cvtshiftcalc.sv @@ -60,10 +60,11 @@ module cvtshiftcalc( // - otherwise: // | LzcInM | 0's if nessisary | // change to int shift to the left one - assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : - CvtResDenormUf ? {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}} : - {CvtLzcIn, {`NF+1{1'b0}}}; - + + always_comb + if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}}; + else if (CvtResDenormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}}; + else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}}; // choose the negative of the fraction size if (`FPSIZES == 1) begin From 500191cf342645dc8094a2898bd7ec81d0c089ff Mon Sep 17 00:00:00 2001 From: James Stine Date: Sun, 17 Jul 2022 13:00:44 -0500 Subject: [PATCH 039/138] Add back extractSummary mkdir plots --- synthDC/extractSummary.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index d4f86fb3e..978365b16 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -60,6 +60,7 @@ def synthsintocsv(): writer.writerow([width, config, special, tech, freq, delay, area]) file.close() + def synthsfromcsv(filename): Synth = namedtuple("Synth", "width config special tech freq delay area") with open(filename, newline='') as csvfile: @@ -75,16 +76,15 @@ def synthsfromcsv(filename): allSynths[i] = Synth(*allSynths[i]) return allSynths + def freqPlot(tech, width, config): ''' plots delay, area for syntheses with specified tech, module, width ''' current_directory = os.getcwd() final_directory = os.path.join(current_directory, 'plots/wally') -# if not os.path.exists(final_directory): -# os.makedirs(final_directory) -# os.chdir(final_directory) - + if not os.path.exists(final_directory): + os.makedirs(final_directory) freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: @@ -159,6 +159,7 @@ def areaDelay(tech, delays, areas, labels, fig, ax, norm=False): return fig + def plotFeatures(tech, width, config): delays, areas, labels = ([] for i in range(3)) freq = techdict[tech].targfreq @@ -176,7 +177,8 @@ def plotFeatures(tech, width, config): titlestr = tech+'_'+width+config plt.title(titlestr) plt.savefig('./plots/wally/features_'+titlestr+'.png') - + + def plotConfigs(tech, special=''): delays, areas, labels = ([] for i in range(3)) freq = techdict[tech].targfreq @@ -215,7 +217,8 @@ def normAreaDelay(special=''): ax.set_ylabel('Area (add32)') ax.legend(handles = fullLeg, loc='upper left') plt.savefig('./plots/wally/normAreaDelay.png') - + + def addFO4axis(fig, ax, tech): fo4 = techdict[tech].fo4 From 8356e5d742de61eda656eaf06b86703243723c4b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 17 Jul 2022 16:20:04 -0500 Subject: [PATCH 040/138] Updated cache sram's to use 1 sram for all words in a way. Still needs to modified to support subdivision by max physical sram width. --- pipelined/src/cache/cacheway.sv | 24 +++++++++++++++++--- pipelined/testbench/testbench.sv | 38 +++++++++++++++++++------------- 2 files changed, 44 insertions(+), 18 deletions(-) diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 192fb9ace..6aedff816 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -55,9 +55,11 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, output logic VictimDirtyWay, output logic [TAGLEN-1:0] VictimTagWay); - localparam WORDSPERLINE = LINELEN/`XLEN; + localparam integer WORDSPERLINE = LINELEN/`XLEN; + localparam integer BYTESPERLINE = LINELEN/8; localparam LOGWPL = $clog2(WORDSPERLINE); localparam LOGXLENBYTES = $clog2(`XLEN/8); + localparam integer BYTESPERWORD = `XLEN/8; logic [NUMLINES-1:0] ValidBits; logic [NUMLINES-1:0] DirtyBits; @@ -69,7 +71,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic SelTag; logic [$clog2(NUMLINES)-1:0] RAdrD; logic [2**LOGWPL-1:0] MemPAdrDecoded; - logic [LINELEN/`XLEN-1:0] SelectedWriteWordEn; + logic [WORDSPERLINE-1:0] SelectedWriteWordEn; logic [(`XLEN-1)/8:0] FinalByteMask; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -107,12 +109,28 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, // *** instantiate one larger RAM, not one per RAM. Expand byte mask genvar words; - for(words = 0; words < LINELEN/`XLEN; words++) begin: word + logic [BYTESPERLINE-1:0] ReplicatedByteMask, SRAMLineByteMask, WordByteEnabled; + assign ReplicatedByteMask = {{WORDSPERLINE}{FinalByteMask}}; + for(words = 0; words < WORDSPERLINE; words++) + assign WordByteEnabled[BYTESPERWORD*(words+1)-1:BYTESPERWORD*(words)] = {{BYTESPERWORD}{SelectedWriteWordEn[words]}}; + assign SRAMLineByteMask = ReplicatedByteMask & WordByteEnabled; + + for(words = 0; words < 1; words++) begin: word + sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(LINELEN)) CacheDataMem(.clk, .Adr(RAdr), + .ReadData(ReadDataLine), + .CacheWriteData(CacheWriteData), + .WriteEnable(1'b1), .ByteMask(SRAMLineByteMask)); + end + + +/* -----\/----- EXCLUDED -----\/----- + for(words = 0; words < WORDSPERLINE; words++) begin: word sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk, .Adr(RAdr), .ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ), .CacheWriteData(CacheWriteData[(words+1)*`XLEN-1:words*`XLEN]), .WriteEnable(SelectedWriteWordEn[words]), .ByteMask(FinalByteMask)); end + -----/\----- EXCLUDED -----/\----- */ // AND portion of distributed read multiplexers mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelEvict}, SelData); diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 6d537b14a..5d74ed9ee 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -429,7 +429,9 @@ module DCacheFlushFSM localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES; localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS; localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN; - localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; + localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN; + localparam integer cachenumwords = 1; //testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; + localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; localparam integer lognumlines = $clog2(numlines); localparam integer loglinebytelen = $clog2(linebytelen); localparam integer lognumways = $clog2(numways); @@ -438,16 +440,17 @@ module DCacheFlushFSM genvar index, way, cacheWord; - logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0]; + logic [linelen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachenumwords-1:0]; + logic [linelen-1:0] cacheline; + logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [cachenumwords-1:0]; + logic CacheValid [numways-1:0] [numlines-1:0] [cachenumwords-1:0]; + logic CacheDirty [numways-1:0] [numlines-1:0] [cachenumwords-1:0]; + logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachenumwords-1:0]; for(index = 0; index < numlines; index++) begin for(way = 0; way < numways; way++) begin - for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin + for(cacheWord = 0; cacheWord < cachenumwords; cacheWord++) begin copyShadow #(.tagstart(tagstart), - .loglinebytelen(loglinebytelen)) + .loglinebytelen(loglinebytelen), .linelen(linelen)) copyShadow(.clk, .start, .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), @@ -472,9 +475,14 @@ module DCacheFlushFSM #1 for(i = 0; i < numlines; i++) begin for(j = 0; j < numways; j++) begin - for(k = 0; k < numwords; k++) begin - if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin - ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; + if (CacheValid[j][i][0] & CacheDirty[j][i][0]) begin + for(k = 0; k < numwords; k++) begin + //cacheline = CacheData[j][i][0]; + // does not work with modelsim + // # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions. + // see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions + //ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k]; + ShadowRAM[(CacheAdr[j][i][0] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][0][`XLEN*k +: `XLEN]; end end end @@ -486,15 +494,15 @@ module DCacheFlushFSM endmodule module copyShadow - #(parameter tagstart, loglinebytelen) + #(parameter tagstart, loglinebytelen, linelen) (input logic clk, input logic start, input logic [`PA_BITS-1:tagstart] tag, input logic valid, dirty, - input logic [`XLEN-1:0] data, + input logic [linelen-1:0] data, input logic [32-1:0] index, input logic [32-1:0] cacheWord, - output logic [`XLEN-1:0] CacheData, + output logic [linelen-1:0] CacheData, output logic [`PA_BITS-1:0] CacheAdr, output logic [`XLEN-1:0] CacheTag, output logic CacheValid, @@ -533,4 +541,4 @@ task automatic updateProgramAddrLabelArray; end $fclose(ProgramLabelMapFP); $fclose(ProgramAddrMapFP); -endtask \ No newline at end of file +endtask From 0ef6137ab9ec2958624e0802766a114ec276c3ce Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 17 Jul 2022 21:05:31 -0500 Subject: [PATCH 041/138] Added degree of freedom to cache/sram. The sram width in bits is no longer defined by XLEN, but instead a separate parameter. This is decoupled from LINELEN, XLEN, and WORDLEN. --- pipelined/src/cache/cacheway.sv | 17 +++++++++- pipelined/testbench/testbench.sv | 57 +++++++++++++++++--------------- 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 6aedff816..5397375a0 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -115,13 +115,28 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, assign WordByteEnabled[BYTESPERWORD*(words+1)-1:BYTESPERWORD*(words)] = {{BYTESPERWORD}{SelectedWriteWordEn[words]}}; assign SRAMLineByteMask = ReplicatedByteMask & WordByteEnabled; + localparam integer SRAMLEN = 256; + localparam integer NUMSRAM = LINELEN/SRAMLEN; + localparam integer SRAMLENINBYTES = SRAMLEN/8; + localparam integer LOGNUMSRAM = $clog2(NUMSRAM); + + for(words = 0; words < NUMSRAM; words++) begin: word + sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .Adr(RAdr), + .ReadData(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]), + .CacheWriteData(CacheWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]), + .WriteEnable(1'b1), .ByteMask(SRAMLineByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); + end + + +/* -----\/----- EXCLUDED -----\/----- for(words = 0; words < 1; words++) begin: word sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(LINELEN)) CacheDataMem(.clk, .Adr(RAdr), .ReadData(ReadDataLine), .CacheWriteData(CacheWriteData), .WriteEnable(1'b1), .ByteMask(SRAMLineByteMask)); end - + -----/\----- EXCLUDED -----/\----- */ + /* -----\/----- EXCLUDED -----\/----- for(words = 0; words < WORDSPERLINE; words++) begin: word diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 5d74ed9ee..0179d1d14 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -429,10 +429,13 @@ module DCacheFlushFSM localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES; localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS; localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN; - localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN; - localparam integer cachenumwords = 1; //testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; - localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; - localparam integer lognumlines = $clog2(numlines); + localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN; + localparam integer sramlen = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].SRAMLEN; + localparam integer cachesramwords = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].NUMSRAM; + +//testbench.dut.core.lsu.bus.dcache.dcache.CacheWays.NUMSRAM; + localparam integer numwords = sramlen/`XLEN; + localparam integer lognumlines = $clog2(numlines); localparam integer loglinebytelen = $clog2(linebytelen); localparam integer lognumways = $clog2(numways); localparam integer tagstart = lognumlines + loglinebytelen; @@ -440,17 +443,17 @@ module DCacheFlushFSM genvar index, way, cacheWord; - logic [linelen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachenumwords-1:0]; - logic [linelen-1:0] cacheline; - logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [cachenumwords-1:0]; - logic CacheValid [numways-1:0] [numlines-1:0] [cachenumwords-1:0]; - logic CacheDirty [numways-1:0] [numlines-1:0] [cachenumwords-1:0]; - logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachenumwords-1:0]; + logic [sramlen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic [sramlen-1:0] cacheline; + logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic CacheValid [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic CacheDirty [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; for(index = 0; index < numlines; index++) begin for(way = 0; way < numways; way++) begin - for(cacheWord = 0; cacheWord < cachenumwords; cacheWord++) begin + for(cacheWord = 0; cacheWord < cachesramwords; cacheWord++) begin copyShadow #(.tagstart(tagstart), - .loglinebytelen(loglinebytelen), .linelen(linelen)) + .loglinebytelen(loglinebytelen), .sramlen(sramlen)) copyShadow(.clk, .start, .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), @@ -468,23 +471,25 @@ module DCacheFlushFSM end end - integer i, j, k; + integer i, j, k, l; always @(posedge clk) begin if (start) begin #1 #1 for(i = 0; i < numlines; i++) begin for(j = 0; j < numways; j++) begin - if (CacheValid[j][i][0] & CacheDirty[j][i][0]) begin - for(k = 0; k < numwords; k++) begin - //cacheline = CacheData[j][i][0]; - // does not work with modelsim - // # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions. - // see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions - //ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k]; - ShadowRAM[(CacheAdr[j][i][0] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][0][`XLEN*k +: `XLEN]; + for(l = 0; l < cachesramwords; l++) begin + if (CacheValid[j][i][l] & CacheDirty[j][i][l]) begin + for(k = 0; k < numwords; k++) begin + //cacheline = CacheData[j][i][0]; + // does not work with modelsim + // # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions. + // see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions + //ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k]; + ShadowRAM[(CacheAdr[j][i][l] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][l][`XLEN*k +: `XLEN]; + end + end end - end end end end @@ -494,15 +499,15 @@ module DCacheFlushFSM endmodule module copyShadow - #(parameter tagstart, loglinebytelen, linelen) + #(parameter tagstart, loglinebytelen, sramlen) (input logic clk, input logic start, input logic [`PA_BITS-1:tagstart] tag, input logic valid, dirty, - input logic [linelen-1:0] data, + input logic [sramlen-1:0] data, input logic [32-1:0] index, input logic [32-1:0] cacheWord, - output logic [linelen-1:0] CacheData, + output logic [sramlen-1:0] CacheData, output logic [`PA_BITS-1:0] CacheAdr, output logic [`XLEN-1:0] CacheTag, output logic CacheValid, @@ -515,7 +520,7 @@ module copyShadow CacheValid = valid; CacheDirty = dirty; CacheData = data; - CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8)); + CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(sramlen/8)); end end From 0210718f191e1ecf5c69246a9503b3b3ff06ae98 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 18 Jul 2022 17:31:17 +0000 Subject: [PATCH 042/138] renamed signals in ocde to match book --- pipelined/src/fpu/divshiftcalc.sv | 28 ++-- pipelined/src/fpu/fcvt.sv | 2 +- pipelined/src/fpu/flags.sv | 6 +- pipelined/src/fpu/fmashiftcalc.sv | 26 ++-- pipelined/src/fpu/fpu.sv | 6 +- pipelined/src/fpu/postprocess.sv | 46 +++---- pipelined/src/fpu/resultsign.sv | 15 +-- pipelined/src/fpu/round.sv | 186 +++++++++++++-------------- pipelined/src/fpu/roundsign.sv | 8 +- pipelined/src/fpu/shiftcorrection.sv | 16 +-- pipelined/testbench/testbench-fp.sv | 6 +- 11 files changed, 171 insertions(+), 174 deletions(-) diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index af321b256..3fbc9419d 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -1,10 +1,10 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`QLEN-1-(`RADIX/4):0] Quot, + input logic [`QLEN-1-(`RADIX/4):0] DivQm, input logic [`FMTBITS-1:0] Fmt, input logic [`DURLEN-1:0] DivEarlyTermShift, - input logic [`NE+1:0] DivCalcExp, + input logic [`NE+1:0] DivQe, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, output logic [`NORMSHIFTSZ-1:0] DivShiftIn, output logic DivResDenorm, @@ -14,21 +14,21 @@ module divshiftcalc( // is the result denromalized // if the exponent is 1 then the result needs to be normalized then the result is denormalizes - assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]); + assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]); // if the result is denormalized - // 00000000x.xxxxxx... Exp = DivCalcExp - // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 - // .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1 + // 00000000x.xxxxxx... Exp = DivQe + // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 + // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 - // Left shift amount = DivCalcExp+NF+1-1 - assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp; + // Left shift amount = DivQe+NF+1-1 + assign DivDenormShift = (`NE+2)'(`NF)+DivQe; // if the result is normalized - // 00000000x.xxxxxx... Exp = DivCalcExp - // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 - // 00000000.xxxxxxx... << NF Exp = DivCalcExp+1 - // 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards) - // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after) + // 00000000x.xxxxxx... Exp = DivQe + // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivQe+1 + // 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after) // inital Left shift amount = NF // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit assign NormShift = (`NE+2)'(`NF); @@ -36,6 +36,6 @@ module divshiftcalc( // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}}; - assign DivShiftIn = {{`NF{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; + assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 4820cf284..b9932523a 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -127,7 +127,7 @@ module fcvt ( // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} : ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : - (LeadingZeros)&{`LOGCVTLEN{XDenorm|IntToFp}}; + (LeadingZeros); /////////////////////////////////////////////////////////////////////////// // exp calculations diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 4e16bc965..71f2a919d 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -48,10 +48,10 @@ module flags( input logic DivOp, // conversion opperation? input logic FmaOp, // Fma opperation? input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow - input logic [`NE+1:0] Nexp, // exponent of the normalized sum + input logic [`NE+1:0] Me, // exponent of the normalized sum input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits input logic FmaAs, FmaPs, // the product and modified Z signs - input logic R, UfLSBRes, S, UfPlus1, // bits used to determine rounding + input logic R, UfL, S, UfPlus1, // bits used to determine rounding output logic DivByZero, output logic IntInvalid, Invalid, Overflow, // flags used to select the res output logic [4:0] PostProcFlg // flags @@ -127,7 +127,7 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index ae974eb07..a6c1a1c60 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -35,7 +35,7 @@ module fmashiftcalc( input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero - output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results + output logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count @@ -57,28 +57,28 @@ module fmashiftcalc( //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin - assign FmaConvNormSumExp = NormSumExp; + assign FmaNe = NormSumExp; end else if (`FPSIZES == 2) begin - assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; + assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; end else if (`FPSIZES == 3) begin always_comb begin case (Fmt) - `FMT: FmaConvNormSumExp = NormSumExp; - `FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; - `FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; - default: FmaConvNormSumExp = {`NE+2{1'bx}}; + `FMT: FmaNe = NormSumExp; + `FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; + `FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; + default: FmaNe = {`NE+2{1'bx}}; endcase end end else if (`FPSIZES == 4) begin always_comb begin case (Fmt) - 2'h3: FmaConvNormSumExp = NormSumExp; - 2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; - 2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; - 2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; + 2'h3: FmaNe = NormSumExp; + 2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; + 2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; + 2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; endcase end @@ -144,11 +144,11 @@ module fmashiftcalc( // - if kill prod dont add to exp // Determine if the result is denormal - // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero; + // assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero; // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1; + assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 1bbd0aea5..65be29972 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -381,10 +381,10 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), - .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM), - .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM), + .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), + .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), - .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), + .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index e0eb50ac8..f9ccd2553 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -48,17 +48,17 @@ module postprocess ( input logic FmaPs, // the product's sign input logic [`NE+1:0] FmaPe, // Product exponent input logic [3*`NF+5:0] FmaSm, // the positive sum - input logic FmaZmSticky, // sticky bit that is calculated during alignment + input logic FmaZmS, // sticky bit that is calculated during alignment input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter input logic FmaNegSum, // was the sum negitive input logic FmaInvA, // do you invert Z input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, - input logic DivSticky, + input logic DivS, input logic DivDone, - input logic [`NE+1:0] DivCalcExp, - input logic [`QLEN-1-(`RADIX/4):0] Quot, + input logic [`NE+1:0] DivQe, + input logic [`QLEN-1-(`RADIX/4):0] DivQm, // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -77,9 +77,9 @@ module postprocess ( logic Ws; logic [`NF-1:0] Rf; // Result fraction logic [`NE-1:0] Re; // Result exponent - logic Nsgn; - logic [`NE+1:0] Nexp; - logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction + logic Ms; + logic [`NE+1:0] Me; + logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow logic S; // S bit logic UfPlus1; // do you add one (for determining underflow flag) @@ -89,19 +89,19 @@ module postprocess ( logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result logic Plus1; // add one to the final result? logic IntInvalid, Overflow, Invalid; // flags - logic UfLSBRes; + logic UfL; logic [`FMTBITS-1:0] OutFmt; // fma signals logic [`NE+1:0] FmaSe; // exponent of the normalized sum logic FmaSZero; // is the sum zero logic [3*`NF+8:0] FmaShiftIn; // shift input - logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results + logic [`NE+1:0] FmaNe; // exponent of the normalized sum not taking into account denormal or zero results logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count // division singals logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt; logic [`NORMSHIFTSZ-1:0] DivShiftIn; - logic [`NE+1:0] DivCorrExp; + logic [`NE+1:0] Qe; logic DivByZero; logic DivResDenorm; logic [`NE+1:0] DivDenormShift; @@ -150,9 +150,9 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp, + fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); + divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSel) @@ -181,9 +181,9 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp, - .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, - .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); + shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe, + .DivResDenorm, .DivDenormShift, .DivOp, .DivQe, + .Qe, .FmaSZero, .Shifted, .FmaSe, .Mf); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -197,19 +197,19 @@ module postprocess ( roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, - .Xs, .Ys, .CvtCs, .Nsgn); + .Xs, .Ys, .CvtCs, .Ms); - round round(.OutFmt, .Frm, .S, .FmaZmSticky, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, - .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, - .DivSticky, .DivDone, - .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfLSBRes, .Nexp); + round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, + .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, + .DivS, .DivDone, + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me); /////////////////////////////////////////////////////////////////////////////// // Sign calculation /////////////////////////////////////////////////////////////////////////////// resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S, - .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws); + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws); /////////////////////////////////////////////////////////////////////////////// // Flags @@ -218,8 +218,8 @@ module postprocess ( flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, - .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, - .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); + .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, + .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); /////////////////////////////////////////////////////////////////////////////// // Select the result diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index e6de0c181..e1ea5e410 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -39,28 +39,25 @@ module resultsign( input logic Mult, input logic R, input logic S, - input logic Nsgn, + input logic Ms, output logic Ws ); - logic ZeroSgn; - logic InfSgn; - logic Underflow; - // logic ResultSgnTmp; + logic Zeros; + logic Infs; // Determine the sign if the sum is zero // if cancelation then 0 unless round to -infinity // if multiply then Psgn // otherwise psign - assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)); - assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive // if p - z is the Sum negitive // if -p + z is the Sum positive // if -p - z then the Sum is negitive - assign InfSgn = ZInf ? FmaAs : FmaPs; - assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn; + assign Infs = ZInf ? FmaAs : FmaPs; + assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 38bacce0d..6132dba4a 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -46,29 +46,29 @@ module round( input logic [1:0] PostProcSel, input logic CvtResDenormUf, input logic CvtResUf, - input logic [`CORRSHIFTSZ-1:0] Nfrac, - input logic FmaZmSticky, // addend's sticky bit + input logic [`CORRSHIFTSZ-1:0] Mf, + input logic FmaZmS, // addend's sticky bit input logic [`NE+1:0] FmaSe, // exponent of the normalized sum - input logic Nsgn, // the result's sign + input logic Ms, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent - input logic [`NE+1:0] DivCorrExp, // the calculated expoent - input logic DivSticky, // sticky bit + input logic [`NE+1:0] Qe, // the calculated expoent + input logic DivS, // sticky bit output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow output logic [`NF-1:0] Rf, // Result fraction output logic [`NE-1:0] Re, // Result exponent output logic S, // sticky bit - output logic [`NE+1:0] Nexp, + output logic [`NE+1:0] Me, output logic Plus1, - output logic R, UfLSBRes // bits needed to calculate rounding + output logic R, UfL // bits needed to calculate rounding ); - logic LSBRes; // bit used for rounding - least significant bit of the normalized sum + logic L; // bit used for rounding - least significant bit of the normalized sum logic UfCalcPlus1; - logic NormSumSticky; // normalized sum's sticky bit - logic UfSticky; // sticky bit for underlow calculation + logic NormS; // normalized sum's sticky bit + logic UfS; // sticky bit for underlow calculation logic [`NF-1:0] RoundFrac; logic FpRes, IntRes; - logic UfRound; + logic UfR; logic FpRound, FpLSBRes, FpUfRound; logic CalcPlus1, FpPlus1; logic [`FLEN:0] RoundAdd; // how much to add to the result @@ -114,61 +114,61 @@ module round( // | NF |1|1| // ^ ^ if floating point result // ^ if not an FMA result - if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN - if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 2) begin // XLEN is either 64 or 32 // so half and single are always smaller then XLEN // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 3) begin // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 4) begin // Quad precision will always be greater than XLEN // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); // 3: NF > NF1 > XLEN // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer - if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); end @@ -176,37 +176,37 @@ module round( // only add the Addend sticky if doing an FMA opperation // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp; + assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp; // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag if (`FPSIZES == 1) begin - assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1]; - assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF]; - assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2]; + assign FpRound = Mf[`CORRSHIFTSZ-`NF-1]; + assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF]; + assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2]; end else if (`FPSIZES == 2) begin - assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1]; - assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1]; - assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2]; + assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; + assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; + assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; end else if (`FPSIZES == 3) begin always_comb case (OutFmt) `FMT: begin - FpRound = Nfrac[`CORRSHIFTSZ-`NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF-2]; end `FMT1: begin - FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF1-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF1]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2]; end `FMT2: begin - FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF2-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF2]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2]; end default: begin FpRound = 1'bx; @@ -218,55 +218,55 @@ module round( always_comb case (OutFmt) 2'h3: begin - FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2]; end 2'h1: begin - FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`D_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2]; end 2'h0: begin - FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`S_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2]; end 2'h2: begin - FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`H_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2]; end endcase end - assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound; - assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes; - assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound; + assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound; + assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes; + assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound; // used to determine underflow flag - assign UfLSBRes = FpRound; + assign UfL = FpRound; // determine sticky - assign S = UfSticky | UfRound; + assign S = UfS | UfR; always_comb begin // Determine if you add 1 case (Frm) - 3'b000: CalcPlus1 = R & (S| LSBRes);//round to nearest even + 3'b000: CalcPlus1 = R & (S| L);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = Nsgn;//round down - 3'b011: CalcPlus1 = ~Nsgn;//round up + 3'b010: CalcPlus1 = Ms;//round down + 3'b011: CalcPlus1 = ~Ms;//round up 3'b100: CalcPlus1 = R;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (Frm) - 3'b000: UfCalcPlus1 = UfRound & (UfSticky| UfLSBRes);//round to nearest even + 3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = Nsgn;//round down - 3'b011: UfCalcPlus1 = ~Nsgn;//round up - 3'b100: UfCalcPlus1 = UfRound;//round to nearest max magnitude + 3'b010: UfCalcPlus1 = Ms;//round down + 3'b011: UfCalcPlus1 = ~Ms;//round up + 3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase @@ -275,7 +275,7 @@ module round( // If an answer is exact don't round assign Plus1 = CalcPlus1 & (S | R); assign FpPlus1 = Plus1&~(ToInt&CvtOp); - assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky + assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky // Compute rounded result if (`FPSIZES == 1) begin @@ -295,19 +295,19 @@ module round( assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)}; // determine the result to be roundned - assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; + assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; always_comb case(PostProcSel) - 2'b10: Nexp = FmaSe; // fma - 2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt - 2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide - default: Nexp = '0; + 2'b10: Me = FmaSe; // fma + 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt + 2'b01: Me = DivDone ? Qe : '0; // divide + default: Me = '0; endcase // round the result // - if the fraction overflows one should be added to the exponent - assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd; + assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; assign Re = FullRe[`NE-1:0]; diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv index 22686b242..55e322bc3 100644 --- a/pipelined/src/fpu/roundsign.sv +++ b/pipelined/src/fpu/roundsign.sv @@ -38,11 +38,11 @@ module roundsign( input logic DivOp, input logic CvtOp, input logic CvtCs, - output logic Nsgn + output logic Ms ); logic FmaResSgnTmp; - logic DivSgn; + logic Qs; // is the result negitive // if p - z is the Sum negitive @@ -52,9 +52,9 @@ module roundsign( // assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs); - assign DivSgn = Xs^Ys; + assign Qs = Xs^Ys; // Sign for rounding calulation - assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp); + assign Ms = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index ecfd9ba08..71a2393a6 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -33,13 +33,13 @@ module shiftcorrection( input logic FmaOp, input logic DivOp, input logic DivResDenorm, - input logic [`NE+1:0] DivCalcExp, + input logic [`NE+1:0] DivQe, input logic [`NE+1:0] DivDenormShift, - input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results + input logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection input logic FmaSZero, - output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction - output logic [`NE+1:0] DivCorrExp, + output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction + output logic [`NE+1:0] Qe, output logic [`NE+1:0] FmaSe // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction @@ -53,16 +53,16 @@ module shiftcorrection( // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) - assign CorrQuotShifted = (LZAPlus2|(DivCalcExp==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; + assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits - assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; + assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; // the quotent is in the range [.5,2) if there is no early termination // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift - assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2}; + assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2}; endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index a95a66248..b90c3d3de 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -679,13 +679,13 @@ module testbenchfp; .Pe, .ZmSticky, .KillProd); postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), - .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp), - .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), + .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), + .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, - .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone, + .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); From 7268b4b334785d77377fc60324abd1f5973230fa Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 18 Jul 2022 17:51:18 +0000 Subject: [PATCH 043/138] removed underflow from inexactct calculation --- pipelined/regression/sim-wally | 2 +- pipelined/src/fpu/flags.sv | 2 +- pipelined/testbench/testbench.sv | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelined/regression/sim-wally b/pipelined/regression/sim-wally index 51c8b3edc..6163ab8b1 100755 --- a/pipelined/regression/sim-wally +++ b/pipelined/regression/sim-wally @@ -1,2 +1,2 @@ -vsim -do "do wally-pipelined.do rv64gc arch64d" +vsim -do "do wally-pipelined.do rv32gc wally32periph" diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 71f2a919d..6b1bc6381 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -131,7 +131,7 @@ module flags( // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed - assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero); + assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero); // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 6d537b14a..0fb5f5e60 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -114,7 +114,7 @@ logic [3:0] dummy; "arch32f": if (`F_SUPPORTED) tests = arch32f; "imperas32i": tests = imperas32i; "imperas32f": if (`F_SUPPORTED) tests = imperas32f; - "wally32d": if (`D_SUPPORTED) tests = wally32d; + // "wally32d": if (`D_SUPPORTED) tests = wally32d; "imperas32m": if (`M_SUPPORTED) tests = imperas32m; "wally32a": if (`A_SUPPORTED) tests = wally32a; "imperas32c": if (`C_SUPPORTED) tests = imperas32c; From 6a77ada908c9e9f677eefb5eba1dcc9a7551b674 Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Mon, 18 Jul 2022 12:13:15 -0700 Subject: [PATCH 044/138] added the sail change to spike to let it all run normally --- tests/riscof/spike/riscof_spike.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py index 88a6269e8..fd4293954 100644 --- a/tests/riscof/spike/riscof_spike.py +++ b/tests/riscof/spike/riscof_spike.py @@ -108,7 +108,7 @@ class spike(pluginTemplate): #TODO: The following assumes you are using the riscv-gcc toolchain. If # not please change appropriately - self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else 'ilp32 ') + self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else ('ilp32e ' if "E" in ispec["ISA"] else 'ilp32 ')) def runTests(self, testList): @@ -158,7 +158,12 @@ class spike(pluginTemplate): # echo statement. if self.target_run: # set up the simulation command. Template is for spike. Please change. - simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf) + if ('NO_SAIL=True' in testentry['macros']): + # if the tests can't run on SAIL we copy the reference output to the src directory + reference_output = re.sub("/src/","/references/", re.sub(".S",".reference_output", test)) + simcmd = 'cut -c-{0:g} {1} > {2}'.format(8, reference_output, sig_file) #use cut to remove comments when copying + else: + simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf) else: simcmd = 'echo "NO RUN"' From 5a38a1522558b64b2034a69fa1aa3c1e1e94ec8d Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Mon, 18 Jul 2022 13:30:35 -0700 Subject: [PATCH 045/138] added additional changes to coremark to support rv32 --- benchmarks/coremark/riscv64-baremetal/core_portme.c | 11 ++++++++--- benchmarks/coremark/riscv64-baremetal/core_portme.h | 6 ++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.c b/benchmarks/coremark/riscv64-baremetal/core_portme.c index 57b7993ad..017bef74e 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.c +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.c @@ -114,7 +114,12 @@ void portable_free(void *p) { #define read_csr(reg) ({ unsigned long __tmp; \ asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ __tmp; }) - #define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8) + // #if (XLEN==64) + // typedef unsigned long long ee_ptr_int; + // #else + // typedef unsigned long ee_ptr_int; + // #endif + #define GETMYTIME(_t) (_t = *(volatile ee_ptr_int*)0x0200BFF8) #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) // Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms) #define TIMER_RES_DIVIDER 10000 @@ -196,8 +201,8 @@ void stop_time(void) { CORE_TICKS get_time(void) { CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); unsigned long instructions = minstretDiff(); - long long cm100 = 1000000000 / elapsed; // coremark score * 100 - long long cpi100 = elapsed*100/instructions; // CPI * 100 + ee_ptr_int cm100 = 1000000000 / elapsed; // coremark score * 100 + ee_ptr_int cpi100 = elapsed*100/instructions; // CPI * 100 ee_printf(" WALLY CoreMark Results (from get_time)\n"); ee_printf(" Elapsed MTIME: %u\n", elapsed); ee_printf(" Elapsed MINSTRET: %lu\n", instructions); diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.h b/benchmarks/coremark/riscv64-baremetal/core_portme.h index 33768b0f1..3146d3ec0 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.h +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h @@ -69,14 +69,16 @@ typedef clock_t CORE_TICKS; // #elif (XLEN==32) // #include // typedef ee_u32 CORE_TICKS; -#else /* Configuration: size_t and clock_t Note these need to match the size of the clock output and the xLen the processor supports */ +#elif (XLEN==64) typedef unsigned long int size_t; typedef unsigned long int clock_t; -typedef clock_t CORE_TICKS; +#else +#include #endif +typedef clock_t CORE_TICKS; /* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION Initialize these strings per platform From cce5fb8dfdb9e05039af2a7a39014c5e7a1e0383 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 18 Jul 2022 20:48:56 +0000 Subject: [PATCH 046/138] moved Ss to execute stage --- pipelined/regression/sim-wally-batch | 2 +- pipelined/src/fpu/fma.sv | 9 ++++++- pipelined/src/fpu/fpu.sv | 36 +++++----------------------- pipelined/src/fpu/postprocess.sv | 3 ++- pipelined/src/fpu/roundsign.sv | 12 ++-------- pipelined/testbench/testbench-fp.sv | 5 ++-- 6 files changed, 22 insertions(+), 45 deletions(-) diff --git a/pipelined/regression/sim-wally-batch b/pipelined/regression/sim-wally-batch index 7e821e584..8b5b5d628 100755 --- a/pipelined/regression/sim-wally-batch +++ b/pipelined/regression/sim-wally-batch @@ -1 +1 @@ -vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d" +vsim -c -do "do wally-pipelined-batch.do rv32gc wally32periph" diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 44cd3616a..3cd128301 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -44,6 +44,7 @@ module fma( output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) output logic As, // the aligned addend's sign (modified Z sign for other opperations) output logic Ps, // the product's sign + output logic Ss, // the sum's sign output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count ); @@ -81,7 +82,7 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm); + add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Ss); loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule @@ -226,6 +227,7 @@ module add( output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend + output logic Ss, output logic [3*`NF+5:0] Sm // the positive sum ); logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum @@ -257,6 +259,11 @@ module add( // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign Ss = NegSum^Ps; //*** move to execute stage endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 65be29972..e7888551c 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -114,6 +114,7 @@ module fpu ( logic NegSumE, NegSumM; logic ZSgnEffE, ZSgnEffM; logic PSgnE, PSgnM; + logic SsE, SsM; logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; // Cvt Signals @@ -255,36 +256,11 @@ module fpu ( .Xm(XManE), .Ym(YManE), .Zm(ZManE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .FOpCtrl(FOpCtrlE), .Fmt(FmtE), - .As(ZSgnEffE), .Ps(PSgnE), + .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Sm(SumE), .Pe(ProdExpE), .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), .ZmSticky(AddendStickyE), .KillProd(KillProdE)); - // // fpdivsqrt using Goldschmidt's iteration - // if(`FLEN == 64) begin - // flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // end - // else if (`FLEN == 32) begin - // flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // end - // flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), - // .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), - // .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - // .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, - // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal @@ -359,9 +335,9 @@ module fpu ( {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE}, - {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM}); + flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE}, + {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); @@ -381,7 +357,7 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), - .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), + .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index f9ccd2553..7e741abbb 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -52,6 +52,7 @@ module postprocess ( input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter input logic FmaNegSum, // was the sum negitive input logic FmaInvA, // do you invert Z + input logic FmaSs, input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, @@ -197,7 +198,7 @@ module postprocess ( roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, - .Xs, .Ys, .CvtCs, .Ms); + .FmaSs, .Xs, .Ys, .CvtCs, .Ms); round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv index 55e322bc3..acecb5947 100644 --- a/pipelined/src/fpu/roundsign.sv +++ b/pipelined/src/fpu/roundsign.sv @@ -38,23 +38,15 @@ module roundsign( input logic DivOp, input logic CvtOp, input logic CvtCs, + input logic FmaSs, output logic Ms ); - logic FmaResSgnTmp; logic Qs; - // is the result negitive - // if p - z is the Sum negitive - // if -p + z is the Sum positive - // if -p - z then the Sum is negitive - assign FmaResSgnTmp = FmaNegSum^FmaPs; //*** move to execute stage - - // assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs); - assign Qs = Xs^Ys; // Sign for rounding calulation - assign Ms = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); + assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index b90c3d3de..be5114e95 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -94,6 +94,7 @@ module testbenchfp; // in-between FMA signals logic Mult; + logic Ss; logic [`NE+1:0] Pe; logic ZmSticky; logic KillProd; @@ -674,13 +675,13 @@ module testbenchfp; fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), .Xe(XExp), .Ye(YExp), .Ze(ZExp), .Xm(XMan), .Ym(YMan), .Zm(ZMan), - .XZero, .YZero, .ZZero, + .XZero, .YZero, .ZZero, .Ss, .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps, .Pe, .ZmSticky, .KillProd); postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), - .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), + .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss), .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), From 1e876733210f687163845bf6694a8b0fc59fe75c Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 18 Jul 2022 23:10:22 +0000 Subject: [PATCH 047/138] Cleaned up Coremark makefile --- benchmarks/coremark/Makefile | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 2db418aa3..d6cdc529e 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -1,6 +1,8 @@ +# Wally Coremark Makefile +# Daniel Torres & David Harris 28 July 2022 + PORT_DIR = $(CURDIR)/riscv64-baremetal cmbase=../../addins/coremark -# cmbase= ../riscv-coremark/coremark work_dir= ../../benchmarks/coremark/work XLEN ?=64 sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ @@ -14,17 +16,7 @@ PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign- -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ - -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN) - -# flags that cause build errors mcmodel=medlow - -# -static -mcmodel=medlow -mtune=sifive-7-series \ -# -O3 -falign-functions=16 -funroll-all-loops -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -# -finline-functions -falign-jumps=4 \ -# -nostdlib -nostartfiles -ffreestanding -mstrict-align \ -# -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \ -# -DPERFORMANCE_RUN=1 -# "-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " + -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN) -DITERATIONS=10 all: $(work_dir)/coremark.bare.riscv.elf.memfile @@ -38,10 +30,8 @@ $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv extractFunctionRadix.sh $<.elf.objdump $(work_dir)/coremark.bare.riscv: $(sources) Makefile - # These flags were used by WD on CoreMark make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="$(PORT_CFLAGS)" - # -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error - mkdir -p $(work_dir) + mkdir -p $(work_dir) mv $(cmbase)/coremark.bare.riscv $(work_dir) .PHONY: clean @@ -49,15 +39,4 @@ $(work_dir)/coremark.bare.riscv: $(sources) Makefile clean: rm -f $(work_dir)/* - - -# # PORT_CFLAGS = -g -march=$(XLEN)im -mabi=$(ABI) -static -mcmodel=medlow -mtune=sifive-3-series \ -# # -O3 -falign-functions=16 -funroll-all-loops \ -# # -finline-functions -falign-jumps=4 \ -# # -nostdlib -nostartfiles -ffreestanding -mstrict-align \ -# # -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \ -# # -DPERFORMANCE_RUN=1 - -# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " -# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta" -# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " \ No newline at end of file + \ No newline at end of file From 630110e73ed0cb72e303eb3a9eedad62aa5f1d8d Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 18 Jul 2022 16:48:13 -0700 Subject: [PATCH 048/138] Coremark cleanup --- benchmarks/coremark/Makefile | 6 +++--- benchmarks/coremark/riscv64-baremetal/core_portme.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index d6cdc529e..3f94a7b8b 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -16,7 +16,7 @@ PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign- -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ - -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN) -DITERATIONS=10 + -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN) all: $(work_dir)/coremark.bare.riscv.elf.memfile @@ -31,7 +31,7 @@ $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv $(work_dir)/coremark.bare.riscv: $(sources) Makefile make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="$(PORT_CFLAGS)" - mkdir -p $(work_dir) + mkdir -p $(work_dir) mv $(cmbase)/coremark.bare.riscv $(work_dir) .PHONY: clean @@ -39,4 +39,4 @@ $(work_dir)/coremark.bare.riscv: $(sources) Makefile clean: rm -f $(work_dir)/* - \ No newline at end of file + diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.h b/benchmarks/coremark/riscv64-baremetal/core_portme.h index 3146d3ec0..ce88f7239 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.h +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h @@ -94,7 +94,7 @@ typedef clock_t CORE_TICKS; #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ #endif #ifndef MEM_LOCATION - #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" + #define MEM_LOCATION "Code and Data in external RAM" #define MEM_LOCATION_UNSPEC 1 #endif From 64b3e4117b3aa0b7d20e45ef713214c86eea0385 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 19 Jul 2022 00:04:24 +0000 Subject: [PATCH 049/138] reworked fmashiftcalc to match book --- pipelined/src/fpu/fmashiftcalc.sv | 69 +++++++++++++++++-------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index a6c1a1c60..6d1c40bc7 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -43,6 +43,7 @@ module fmashiftcalc( ); logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias + logic [`NE+1:0] BiasCorr; /////////////////////////////////////////////////////////////////////////////// // Normalization @@ -50,37 +51,40 @@ module fmashiftcalc( //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); - + logic [`NE+1:0] FmaSe; + assign FmaSe = FmaKillProd ? {2'b0, Ze} : FmaPe; // calculate the sum's exponent - // ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4 - assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); + assign NormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin assign FmaNe = NormSumExp; end else if (`FPSIZES == 2) begin - assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; - + assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS); + assign FmaNe = NormSumExp+BiasCorr; + end else if (`FPSIZES == 3) begin always_comb begin case (Fmt) - `FMT: FmaNe = NormSumExp; - `FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; - `FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; - default: FmaNe = {`NE+2{1'bx}}; + `FMT: BiasCorr = '0; + `FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS); + `FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS); + default: BiasCorr = 'x; endcase end + assign FmaNe = NormSumExp+BiasCorr; end else if (`FPSIZES == 4) begin always_comb begin case (Fmt) - 2'h3: FmaNe = NormSumExp; - 2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; - 2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; - 2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; + 2'h3: BiasCorr = '0; + 2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS); + 2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS); + 2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS); endcase end + assign FmaNe = NormSumExp+BiasCorr; end @@ -89,25 +93,25 @@ module fmashiftcalc( if (`FPSIZES == 1) begin logic Sum0LEZ, Sum0GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); + assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; end else if (`FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; + assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); + assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp; assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; end else if (`FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; - assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)); - assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp; + assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); + assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp; + assign Sum2LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2)); + assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|NormSumExp; always_comb begin case (Fmt) `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; @@ -120,20 +124,20 @@ module fmashiftcalc( end else if (`FPSIZES == 4) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp; - assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)); - assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp; - assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)); - assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp; + assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS)); + assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|NormSumExp; + assign Sum2LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS)); + assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|NormSumExp; + assign Sum3LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS)); + assign Sum3GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|NormSumExp; always_comb begin case (Fmt) 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; - endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking + endcase end end @@ -152,5 +156,8 @@ module fmashiftcalc( // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; - assign FmaShiftAmt = FmaNCnt+DenormShift; + if (`FPSIZES == 1) + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1; + else + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1; endmodule From 514674417ed91fb5b0c94c1750d156b3b1d626b6 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 19 Jul 2022 01:10:10 +0000 Subject: [PATCH 050/138] moved Se into execute stage --- pipelined/src/fpu/fma.sv | 7 +++- pipelined/src/fpu/fmashiftcalc.sv | 61 ++++++++++++++-------------- pipelined/src/fpu/fpu.sv | 11 ++--- pipelined/src/fpu/postprocess.sv | 15 +++---- pipelined/src/fpu/resultsign.sv | 4 +- pipelined/src/fpu/round.sv | 6 +-- pipelined/src/fpu/shiftcorrection.sv | 6 +-- pipelined/testbench/testbench-fp.sv | 5 ++- 8 files changed, 61 insertions(+), 54 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 3cd128301..3f4cc2ac5 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -45,6 +45,7 @@ module fma( output logic As, // the aligned addend's sign (modified Z sign for other opperations) output logic Ps, // the product's sign output logic Ss, // the sum's sign + output logic [`NE+1:0] Se, output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count ); @@ -82,7 +83,7 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Ss); + add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule @@ -223,11 +224,14 @@ module add( input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) input logic KillProd, // should the product be set to 0 input logic ZmSticky, + input logic [`NE-1:0] Ze, + input logic [`NE+1:0] Pe, output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend output logic Ss, + output logic [`NE+1:0] Se, output logic [3*`NF+5:0] Sm // the positive sum ); logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum @@ -264,6 +268,7 @@ module add( // if -p + z is the Sum positive // if -p - z then the Sum is negitive assign Ss = NegSum^Ps; //*** move to execute stage + assign Se = KillProd ? {2'b0, Ze} : Pe; endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 6d1c40bc7..79953b212 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -35,14 +35,15 @@ module fmashiftcalc( input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero - output logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results + input logic [`NE+1:0] FmaSe, + output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [3*`NF+8:0] FmaShiftIn // is the sum zero ); logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later - logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias + logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias logic [`NE+1:0] BiasCorr; /////////////////////////////////////////////////////////////////////////////// @@ -51,18 +52,16 @@ module fmashiftcalc( //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); - logic [`NE+1:0] FmaSe; - assign FmaSe = FmaKillProd ? {2'b0, Ze} : FmaPe; // calculate the sum's exponent - assign NormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); + assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin - assign FmaNe = NormSumExp; + assign NormSumExp = PreNormSumExp; end else if (`FPSIZES == 2) begin assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS); - assign FmaNe = NormSumExp+BiasCorr; + assign NormSumExp = PreNormSumExp+BiasCorr; end else if (`FPSIZES == 3) begin always_comb begin @@ -73,7 +72,7 @@ module fmashiftcalc( default: BiasCorr = 'x; endcase end - assign FmaNe = NormSumExp+BiasCorr; + assign NormSumExp = PreNormSumExp+BiasCorr; end else if (`FPSIZES == 4) begin always_comb begin @@ -84,7 +83,7 @@ module fmashiftcalc( 2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS); endcase end - assign FmaNe = NormSumExp+BiasCorr; + assign NormSumExp = PreNormSumExp+BiasCorr; end @@ -92,26 +91,26 @@ module fmashiftcalc( if (`FPSIZES == 1) begin logic Sum0LEZ, Sum0GEFL; - assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; end else if (`FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; - assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp; + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; end else if (`FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; - assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp; - assign Sum2LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2)); - assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|NormSumExp; + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; + assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2)); + assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp; always_comb begin case (Fmt) `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; @@ -123,14 +122,14 @@ module fmashiftcalc( end else if (`FPSIZES == 4) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL; - assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|NormSumExp; - assign Sum2LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS)); - assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|NormSumExp; - assign Sum3LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS)); - assign Sum3GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|NormSumExp; + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp; + assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS)); + assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp; + assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS)); + assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp; always_comb begin case (Fmt) 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; @@ -148,11 +147,11 @@ module fmashiftcalc( // - if kill prod dont add to exp // Determine if the result is denormal - // assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero; + // assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero; // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1; + assign DenormShift = FmaPreResultDenorm ? NormSumExp[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index e7888551c..a9c0ac247 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -109,6 +109,7 @@ module fpu ( logic [3*`NF+5:0] SumE, SumM; logic [`NE+1:0] ProdExpE, ProdExpM; logic AddendStickyE, AddendStickyM; + logic [`NE+1:0] SeE,SeM; logic KillProdE, KillProdM; logic InvAE, InvAM; logic NegSumE, NegSumM; @@ -256,7 +257,7 @@ module fpu ( .Xm(XManE), .Ym(YManE), .Zm(ZManE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .FOpCtrl(FOpCtrlE), .Fmt(FmtE), - .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), + .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE), .Sm(SumE), .Pe(ProdExpE), .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), .ZmSticky(AddendStickyE), .KillProd(KillProdE)); @@ -335,9 +336,9 @@ module fpu ( {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE}, - {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM}); + flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE}, + {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); @@ -359,7 +360,7 @@ module fpu ( postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), - .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), + .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM), .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 7e741abbb..de3c4f30c 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -46,6 +46,7 @@ module postprocess ( //fma signals input logic FmaAs, // the modified Z sign - depends on instruction input logic FmaPs, // the product's sign + input logic [`NE+1:0] FmaSe, input logic [`NE+1:0] FmaPe, // Product exponent input logic [3*`NF+5:0] FmaSm, // the positive sum input logic FmaZmS, // sticky bit that is calculated during alignment @@ -93,10 +94,10 @@ module postprocess ( logic UfL; logic [`FMTBITS-1:0] OutFmt; // fma signals - logic [`NE+1:0] FmaSe; // exponent of the normalized sum + logic [`NE+1:0] FmaMe; // exponent of the normalized sum logic FmaSZero; // is the sum zero logic [3*`NF+8:0] FmaShiftIn; // shift input - logic [`NE+1:0] FmaNe; // exponent of the normalized sum not taking into account denormal or zero results + logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account denormal or zero results logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count // division singals @@ -151,7 +152,7 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe, + fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); @@ -182,9 +183,9 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe, + shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .NormSumExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivQe, - .Qe, .FmaSZero, .Shifted, .FmaSe, .Mf); + .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -201,7 +202,7 @@ module postprocess ( .FmaSs, .Xs, .Ys, .CvtCs, .Ms); round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, - .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, + .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, .DivS, .DivDone, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me); @@ -209,7 +210,7 @@ module postprocess ( // Sign calculation /////////////////////////////////////////////////////////////////////////////// - resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S, + resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaMe, .R, .S, .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws); /////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index e1ea5e410..b8019b98b 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -34,7 +34,7 @@ module resultsign( input logic ZInf, input logic InfIn, input logic FmaOp, - input logic [`NE+1:0] FmaSe, + input logic [`NE+1:0] FmaMe, input logic FmaSZero, input logic Mult, input logic R, @@ -50,7 +50,7 @@ module resultsign( // if cancelation then 0 unless round to -infinity // if multiply then Psgn // otherwise psign - assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + assign Zeros = (FmaPs^FmaAs)&~(FmaMe[`NE+1] | ((FmaMe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 6132dba4a..0943413bd 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -48,7 +48,7 @@ module round( input logic CvtResUf, input logic [`CORRSHIFTSZ-1:0] Mf, input logic FmaZmS, // addend's sticky bit - input logic [`NE+1:0] FmaSe, // exponent of the normalized sum + input logic [`NE+1:0] FmaMe, // exponent of the normalized sum input logic Ms, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent input logic [`NE+1:0] Qe, // the calculated expoent @@ -176,7 +176,7 @@ module round( // only add the Addend sticky if doing an FMA opperation // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp; + assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp; // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag @@ -299,7 +299,7 @@ module round( always_comb case(PostProcSel) - 2'b10: Me = FmaSe; // fma + 2'b10: Me = FmaMe; // fma 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt 2'b01: Me = DivDone ? Qe : '0; // divide default: Me = '0; diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index 71a2393a6..50cffb078 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -35,12 +35,12 @@ module shiftcorrection( input logic DivResDenorm, input logic [`NE+1:0] DivQe, input logic [`NE+1:0] DivDenormShift, - input logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results + input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection input logic FmaSZero, output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction output logic [`NE+1:0] Qe, - output logic [`NE+1:0] FmaSe // exponent of the normalized sum + output logic [`NE+1:0] FmaMe // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction logic [`CORRSHIFTSZ-1:0] CorrQuotShifted; @@ -58,7 +58,7 @@ module shiftcorrection( assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; + assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index be5114e95..91ce82616 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -96,6 +96,7 @@ module testbenchfp; logic Mult; logic Ss; logic [`NE+1:0] Pe; + logic [`NE+1:0] Se; logic ZmSticky; logic KillProd; logic [$clog2(3*`NF+7)-1:0] NCnt; @@ -675,7 +676,7 @@ module testbenchfp; fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), .Xe(XExp), .Ye(YExp), .Ze(ZExp), .Xm(XMan), .Ym(YMan), .Zm(ZMan), - .XZero, .YZero, .ZZero, .Ss, + .XZero, .YZero, .ZZero, .Ss, .Se, .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps, .Pe, .ZmSticky, .KillProd); @@ -686,7 +687,7 @@ module testbenchfp; .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, - .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, + .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSe(Se), .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); From b13b49658bb6906378910ab7ca95beebee54d8b1 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 19 Jul 2022 02:58:07 +0000 Subject: [PATCH 051/138] Removed duplicate -march from CoreMark makefile --- benchmarks/coremark/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 3f94a7b8b..1b619d80e 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -10,8 +10,8 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -ARCH := rv$(XLEN)im -PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ +ARCH := rv$(XLEN)i +PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -funroll-all-loops -finline-functions -falign-jumps=4 \ -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ From a79e5e11f66a1544c40e448dee9f6f00f11ab2dc Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 18 Jul 2022 23:37:18 -0500 Subject: [PATCH 052/138] Merged together the cache speed updates with the cache sram changes. The fstore2 changes still need to be added. --- pipelined/regression/wave.do | 522 ++++++++++++++++---------------- pipelined/src/cache/cache.sv | 36 ++- pipelined/src/cache/cachefsm.sv | 106 ++++--- pipelined/src/cache/cacheway.sv | 46 +-- 4 files changed, 364 insertions(+), 346 deletions(-) diff --git a/pipelined/regression/wave.do b/pipelined/regression/wave.do index b8d2d4484..ed3ba8ade 100644 --- a/pipelined/regression/wave.do +++ b/pipelined/regression/wave.do @@ -37,11 +37,11 @@ add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/Sta add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallE add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallM add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallW -add wave -noupdate -group {instruction pipeline} /testbench/InstrFName -add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/FinalInstrRawF -add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD -add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrE -add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrM +add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName +add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/core/ifu/FinalInstrRawF +add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/core/ifu/InstrD +add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/core/ifu/InstrE +add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/core/ifu/InstrM add wave -noupdate -group {Decode Stage} /testbench/dut/core/ifu/PCD add wave -noupdate -group {Decode Stage} /testbench/dut/core/ifu/InstrD add wave -noupdate -group {Decode Stage} /testbench/InstrDName @@ -55,10 +55,10 @@ add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE add wave -noupdate -group {Execution Stage} /testbench/InstrEName add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE add wave -noupdate -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName -add wave -noupdate -group {Memory Stage} /testbench/dut/core/PCM -add wave -noupdate -group {Memory Stage} /testbench/dut/core/InstrM -add wave -noupdate -group {Memory Stage} /testbench/InstrMName -add wave -noupdate -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM +add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM add wave -noupdate -group {WriteBack stage} /testbench/PCW add wave -noupdate -group {WriteBack stage} /testbench/InstrW add wave -noupdate -group {WriteBack stage} /testbench/InstrWName @@ -125,12 +125,12 @@ add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/if add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredClassNonCFIWrongE add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE -add wave -noupdate -group PCS /testbench/dut/core/ifu/PCNextF -add wave -noupdate -group PCS /testbench/dut/core/PCF -add wave -noupdate -group PCS /testbench/dut/core/ifu/PCD -add wave -noupdate -group PCS /testbench/dut/core/PCE -add wave -noupdate -group PCS /testbench/dut/core/PCM -add wave -noupdate -group PCS /testbench/PCW +add wave -noupdate -expand -group PCS /testbench/dut/core/ifu/PCNextF +add wave -noupdate -expand -group PCS /testbench/dut/core/PCF +add wave -noupdate -expand -group PCS /testbench/dut/core/ifu/PCD +add wave -noupdate -expand -group PCS /testbench/dut/core/PCE +add wave -noupdate -expand -group PCS /testbench/dut/core/PCM +add wave -noupdate -expand -group PCS /testbench/PCW add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F @@ -190,195 +190,177 @@ add wave -noupdate -group AHB /testbench/dut/core/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITED -add wave -noupdate -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState -add wave -noupdate -group lsu /testbench/dut/core/lsu/SelHPTW -add wave -noupdate -group lsu /testbench/dut/core/lsu/InterlockStall -add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUStallM -add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM -add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM -add wave -noupdate -group lsu /testbench/dut/core/lsu/WriteDataM -add wave -noupdate -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr -add wave -noupdate -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/BusStall -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA -add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr -add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD} -add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} -add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM -add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress -add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault -add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM -add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr -add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE -add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal -add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM -add wave -noupdate -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE -add wave -noupdate -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF -add wave -noupdate -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HSELPLIC -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HADDR -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWRITE -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADY -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HTRANS -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWDATA +add wave -noupdate -expand -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/SelHPTW +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/InterlockStall +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUStallM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/WriteDataM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr +add wave -noupdate -expand -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/BusStall +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA +add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD} +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM +add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress +add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault +add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM +add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr +add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE +add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal +add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE +add wave -noupdate -expand -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF +add wave -noupdate -expand -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADPLIC -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HRESPPLIC -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADYPLIC add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intClaim add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intEn add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intInProgress @@ -392,41 +374,12 @@ add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/pl add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/max_priority_with_irqs add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/irqs_at_max_priority add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/threshMask -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HCLK -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HSELGPIO -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HADDR -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWDATA -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWRITE -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADY -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HTRANS -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADGPIO -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HRESPGPIO -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADYGPIO add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HCLK -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HSELCLINT -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HADDR -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWRITE -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWDATA -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADY -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HTRANS -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADCLINT -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP -add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HCLK -add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HRESETn -add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HSELUART -add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HADDR -add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HWRITE -add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HWDATA -add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HREADUART -add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HRESPUART -add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HREADYUART add wave -noupdate -group uart -expand -group Registers -expand /testbench/dut/uncore/uart/uart/u/LSR add wave -noupdate -group uart -expand -group Registers /testbench/dut/uncore/uart/uart/u/MCR add wave -noupdate -group uart -expand -group Registers /testbench/dut/uncore/uart/uart/u/MSR @@ -468,30 +421,81 @@ add wave -noupdate -group {debug trace} -expand -group wb /testbench/PCW add wave -noupdate -group {pc selection} /testbench/dut/core/ifu/PCNext2F add wave -noupdate -group {pc selection} /testbench/dut/core/ifu/PrivilegedNextPCM add wave -noupdate -group {pc selection} /testbench/dut/core/ifu/PrivilegedChangePCM -add wave -noupdate -group ifu -color Gold /testbench/dut/core/ifu/bus/busdp/busfsm/BusCurrState -add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusRead -add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAdr -add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAck -add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusHRDATA -add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUTransComplete -add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillF -add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/CurrState -add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillDataLine0 -add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SelSpillF -add wave -noupdate -group ifu -expand -group icache -color Gold /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CurrState -add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/ITLBMissF -add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SelAdr -add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/PCNextF -add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/PCPF -add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/icache/HitWay -add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/ICacheStallF -add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/FinalInstrRawF -add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr -add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck -add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusWriteData -add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite -add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF -add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress +add wave -noupdate -expand -group ifu -color Gold /testbench/dut/core/ifu/bus/busdp/busfsm/BusCurrState +add wave -noupdate -expand -group ifu /testbench/dut/core/ifu/IFUBusRead +add wave -noupdate -expand -group ifu /testbench/dut/core/ifu/IFUBusAdr +add wave -noupdate -expand -group ifu /testbench/dut/core/ifu/IFUBusAck +add wave -noupdate -expand -group ifu /testbench/dut/core/ifu/IFUBusHRDATA +add wave -noupdate -expand -group ifu /testbench/dut/core/ifu/IFUTransComplete +add wave -noupdate -expand -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillF +add wave -noupdate -expand -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/CurrState +add wave -noupdate -expand -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillDataLine0 +add wave -noupdate -expand -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SelSpillF +add wave -noupdate -expand -group ifu -expand -group icache -color Gold /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CurrState +add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/ITLBMissF +add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SelAdr +add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/PCNextF +add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/PCPF +add wave -noupdate -expand -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/icache/HitWay +add wave -noupdate -expand -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/ICacheStallF +add wave -noupdate -expand -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/FinalInstrRawF +add wave -noupdate -expand -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr +add wave -noupdate -expand -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck +add wave -noupdate -expand -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusWriteData +add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/VictimWay +add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetDirtyWay +add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetValidWay +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/ByteMask} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/ReadData} +add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite +add wave -noupdate -expand -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF +add wave -noupdate -expand -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress add wave -noupdate -group {Performance Counters} -label MCYCLE -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[0]} add wave -noupdate -group {Performance Counters} -label MINSTRET -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[2]} add wave -noupdate -group {Performance Counters} -label {LOAD STORE HAZARD} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[3]} @@ -508,9 +512,11 @@ add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {D add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]} add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/NextPTE add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/UpdatePTE +add wave -noupdate {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/FinalByteMask} +add wave -noupdate {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/SelectedWriteWordEn} TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 5} {0 ns} 0} -quietly wave cursor active 1 +WaveRestoreCursors {{Cursor 5} {307575 ns} 1} {{Cursor 2} {307965 ns} 1} {{Cursor 3} {307661 ns} 0} +quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 314 configure wave -justifyvalue left @@ -525,4 +531,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {0 ns} {208 ns} +WaveRestoreZoom {0 ns} {3158332 ns} diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index ca6a5c9cf..62b74a8b6 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -105,9 +105,15 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER logic [NUMWAYS-1:0] SelectedWay; logic [NUMWAYS-1:0] SetValidWay, ClearValidWay, SetDirtyWay, ClearDirtyWay; logic [1:0] CacheRW, CacheAtomic; - logic [LINELEN-1:0] ReadDataLine; + logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; logic save, restore; + logic SelBusBuffer; + + localparam LOGXLENBYTES = $clog2(`XLEN/8); + logic [2**LOGWPL-1:0] MemPAdrDecoded; + logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, LineByteMux; + genvar index; ///////////////////////////////////////////////////////////////////////////////////////////// // Read Path @@ -121,7 +127,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) - CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FStore2, + CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .LineByteMask, .FStore2, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .Invalidate(InvalidateCacheM)); @@ -134,7 +140,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER // ReadDataLineWay is a 2d array of cache line len by number of ways. // Need to OR together each way in a bitwise manner. // Final part of the AO Mux. First is the AND in the cacheway. - or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLine)); + or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache)); or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); // Because of the sram clocked read when the ieu is stalled the read data maybe lost. @@ -153,6 +159,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER .y(WordOffsetAddr)); else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]; + mux2 #(LINELEN) EarlyReturnBuf(ReadDataLineCache, CacheBusWriteData, SelBusBuffer, ReadDataLine); + subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL, LOGWPL) subcachelineread( .clk, .reset, .PAdr(WordOffsetAddr), .save, .restore, .ReadDataLine, .ReadDataWord); @@ -160,8 +168,24 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write data and address. Muxes between writes from bus and writes from CPU. ///////////////////////////////////////////////////////////////////////////////////////////// - mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); + logic [LINELEN-1:0] FinalWriteDataDup; + assign FinalWriteDataDup = {WORDSPERLINE{FinalWriteData}}; + + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); + for(index = 0; index < 2**LOGWPL; index++) begin + assign DemuxedByteMask[(index+1)*(`XLEN/8)-1:index*(`XLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; + end + // *** have to add back in fstore2 + assign LineByteMux = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. + assign LineByteMask = ~SetValid & ~SetDirty ? '0 : ~SetValid & SetDirty ? DemuxedByteMask : '1; // if store hit only enable the word and subword bytes, else write all bytes. + + for(index = 0; index < LINELEN/8; index++) begin + mux2 #(8) WriteDataMux(.d0(FinalWriteDataDup[8*index+7:8*index]), + .d1(CacheBusWriteData[8*index+7:8*index]), .s(LineByteMux[index]), .y(CacheWriteData[8*index+7:8*index])); + end + //mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), +// .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}), @@ -204,7 +228,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER .ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelEvict, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, - .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, + .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelBusBuffer, .save, .restore, .LRUWriteEn); endmodule diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 857884c43..3aa736121 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -32,50 +32,51 @@ module cachefsm (input logic clk, - input logic reset, + input logic reset, // inputs from IEU input logic [1:0] CacheRW, input logic [1:0] CacheAtomic, - input logic FlushCache, + input logic FlushCache, // hazard inputs - input logic CPUBusy, + input logic CPUBusy, // interlock fsm - input logic IgnoreRequestTLB, - input logic IgnoreRequestTrapM, + input logic IgnoreRequestTLB, + input logic IgnoreRequestTrapM, input logic TrapM, // Bus inputs - input logic CacheBusAck, + input logic CacheBusAck, // dcache internals - input logic CacheHit, - input logic VictimDirty, - input logic FlushAdrFlag, - input logic FlushWayFlag, + input logic CacheHit, + input logic VictimDirty, + input logic FlushAdrFlag, + input logic FlushWayFlag, // hazard outputs - output logic CacheStall, + output logic CacheStall, // counter outputs - output logic CacheMiss, - output logic CacheAccess, + output logic CacheMiss, + output logic CacheAccess, // Bus outputs - output logic CacheCommitted, - output logic CacheWriteLine, - output logic CacheFetchLine, + output logic CacheCommitted, + output logic CacheWriteLine, + output logic CacheFetchLine, // dcache internals - output logic SelAdr, - output logic ClearValid, - output logic ClearDirty, - output logic SetDirty, - output logic SetValid, - output logic SelEvict, - output logic LRUWriteEn, - output logic SelFlush, - output logic FlushAdrCntEn, - output logic FlushWayCntEn, - output logic FlushAdrCntRst, - output logic FlushWayCntRst, - output logic save, - output logic restore); + output logic SelAdr, + output logic ClearValid, + output logic ClearDirty, + output logic SetDirty, + output logic SetValid, + output logic SelEvict, + output logic LRUWriteEn, + output logic SelFlush, + output logic FlushAdrCntEn, + output logic FlushWayCntEn, + output logic FlushAdrCntRst, + output logic FlushWayCntRst, + output logic SelBusBuffer, + output logic save, + output logic restore); logic resetDelay; logic AMO; @@ -87,8 +88,9 @@ module cachefsm typedef enum logic [3:0] {STATE_READY, // hit states // miss states STATE_MISS_FETCH_WDV, - STATE_MISS_FETCH_DONE, + STATE_MISS_EVICT_DIRTY_START, STATE_MISS_EVICT_DIRTY, + STATE_MISS_EVICT_DIRTY_DONE, STATE_MISS_WRITE_CACHE_LINE, STATE_MISS_READ_WORD, STATE_MISS_READ_WORD_DELAY, @@ -139,21 +141,22 @@ module cachefsm STATE_READY: if(IgnoreRequest) NextState = STATE_READY; else if(DoFlush) NextState = STATE_FLUSH; else if(DoAnyHit & CPUBusy) NextState = STATE_CPU_BUSY; - else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; // change + else if(DoAnyMiss & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY_START; // change + else if(DoAnyMiss & ~VictimDirty) NextState = STATE_MISS_FETCH_WDV; // change else NextState = STATE_READY; - STATE_MISS_FETCH_WDV: if(CacheBusAck) NextState = STATE_MISS_FETCH_DONE; + STATE_MISS_FETCH_WDV: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; else NextState = STATE_MISS_FETCH_WDV; - STATE_MISS_FETCH_DONE: if(VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; - else NextState = STATE_MISS_WRITE_CACHE_LINE; - STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_MISS_READ_WORD; - STATE_MISS_READ_WORD: if(CacheRW[0] & ~AMO) NextState = STATE_MISS_WRITE_WORD; + STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; + STATE_MISS_READ_WORD: if(CacheRW[0] & ~AMO) NextState = STATE_MISS_WRITE_WORD; else NextState = STATE_MISS_READ_WORD_DELAY; STATE_MISS_READ_WORD_DELAY: if(CPUBusy) NextState = STATE_CPU_BUSY; else NextState = STATE_READY; STATE_MISS_WRITE_WORD: if(CPUBusy) NextState = STATE_CPU_BUSY; else NextState = STATE_READY; - STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; + STATE_MISS_EVICT_DIRTY_START: NextState = STATE_MISS_EVICT_DIRTY; // start needed for the delayed lru update. + STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_EVICT_DIRTY_DONE; else NextState = STATE_MISS_EVICT_DIRTY; + STATE_MISS_EVICT_DIRTY_DONE: NextState = STATE_MISS_FETCH_WDV; STATE_CPU_BUSY: if(CPUBusy) NextState = STATE_CPU_BUSY; else NextState = STATE_READY; STATE_FLUSH: NextState = STATE_FLUSH_CHECK; @@ -175,9 +178,10 @@ module cachefsm assign CacheCommitted = CurrState != STATE_READY; assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAnyMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | - (CurrState == STATE_MISS_FETCH_DONE) | + (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_MISS_EVICT_DIRTY) | - (CurrState == STATE_MISS_WRITE_CACHE_LINE) | + (CurrState == STATE_MISS_EVICT_DIRTY_DONE) | + (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. (CurrState == STATE_MISS_READ_WORD) | (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK & ~(FlushFlag)) | @@ -188,15 +192,16 @@ module cachefsm assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; assign SetDirty = (CurrState == STATE_READY & DoAnyUpdateHit) | (CurrState == STATE_MISS_READ_WORD_DELAY & AMO) | - (CurrState == STATE_MISS_WRITE_WORD); + (CurrState == STATE_MISS_WRITE_CACHE_LINE & (AMO | CacheRW[0])); assign ClearValid = '0; - assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE) | + assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | (CurrState == STATE_FLUSH_CLEAR_DIRTY); assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) | - (CurrState == STATE_MISS_READ_WORD_DELAY) | - (CurrState == STATE_MISS_WRITE_WORD); + (CurrState == STATE_MISS_WRITE_CACHE_LINE); // Flush and eviction controls - assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY); + assign SelEvict = (CurrState == STATE_READY & DoAnyMiss & VictimDirty) | + (CurrState == STATE_MISS_EVICT_DIRTY_START) | + (CurrState == STATE_MISS_EVICT_DIRTY); assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | (CurrState == STATE_FLUSH_CLEAR_DIRTY); @@ -208,8 +213,9 @@ module cachefsm assign FlushAdrCntRst = (CurrState == STATE_READY); assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR); // Bus interface controls - assign CacheFetchLine = (CurrState == STATE_READY & DoAnyMiss); - assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_DONE & VictimDirty) | + assign CacheFetchLine = (CurrState == STATE_READY & DoAnyMiss & ~VictimDirty) | + (CurrState == STATE_MISS_EVICT_DIRTY_DONE); + assign CacheWriteLine = (CurrState == STATE_READY & DoAnyMiss & VictimDirty) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); // handle cpu stall. assign restore = ((CurrState == STATE_CPU_BUSY)) & ~`REPLAY; @@ -222,10 +228,10 @@ module cachefsm // use the raw requests as we don't want IgnoreRequestTrapM in the critical path (CurrState == STATE_READY & ((AMO | CacheRW[0]) & CacheHit)) | // changes if store delay hazard removed (CurrState == STATE_READY & (CacheRW[1] & CacheHit) & (CPUBusy & `REPLAY)) | - + (CurrState == STATE_READY & (DoAnyMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | - (CurrState == STATE_MISS_FETCH_DONE) | (CurrState == STATE_MISS_EVICT_DIRTY) | + (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_MISS_WRITE_CACHE_LINE) | (CurrState == STATE_MISS_READ_WORD) | (CurrState == STATE_MISS_READ_WORD_DELAY & (AMO | (CPUBusy & `REPLAY))) | @@ -233,5 +239,7 @@ module cachefsm (CurrState == STATE_CPU_BUSY & (CPUBusy & `REPLAY)) | resetDelay; + + assign SelBusBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE; endmodule // cachefsm diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 5397375a0..9734d8acc 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -48,7 +48,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic VictimWay, input logic FlushWay, input logic Invalidate, - input logic [(`XLEN-1)/8:0] ByteMask, +// input logic [(`XLEN-1)/8:0] ByteMask, + input logic [LINELEN/8-1:0] LineByteMask, output logic [LINELEN-1:0] ReadDataLineWay, output logic HitWay, @@ -71,12 +72,15 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic SelTag; logic [$clog2(NUMLINES)-1:0] RAdrD; logic [2**LOGWPL-1:0] MemPAdrDecoded; - logic [WORDSPERLINE-1:0] SelectedWriteWordEn; - logic [(`XLEN-1)/8:0] FinalByteMask; + logic SelectedWriteWordEn; +// logic [WORDSPERLINE-1:0] SelectedWriteWordEn; +// logic [(`XLEN-1)/8:0] FinalByteMask; + logic [LINELEN/8-1:0] FinalByteMask; ///////////////////////////////////////////////////////////////////////////////////////////// // Write Enable demux ///////////////////////////////////////////////////////////////////////////////////////////// +/* -----\/----- EXCLUDED -----\/----- if(`LLEN>`XLEN)begin logic [2**LOGWPL-1:0] MemPAdrDecodedtmp; onehotdecoder #(LOGWPL) adrdec( @@ -85,9 +89,10 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, end else onehotdecoder #(LOGWPL) adrdec( .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); + -----/\----- EXCLUDED -----/\----- */ // If writing the whole line set all write enables to 1, else only set the correct word. - assign SelectedWriteWordEn = SetValidWay ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND - assign FinalByteMask = SetValidWay ? '1 : ByteMask; // OR + assign SelectedWriteWordEn = SetValidWay | SetDirtyWay;// ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND + assign FinalByteMask = SetValidWay ? '1 : LineByteMask; // OR ///////////////////////////////////////////////////////////////////////////////////////////// // Tag Array @@ -107,15 +112,9 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, // Data Array ///////////////////////////////////////////////////////////////////////////////////////////// - // *** instantiate one larger RAM, not one per RAM. Expand byte mask genvar words; - logic [BYTESPERLINE-1:0] ReplicatedByteMask, SRAMLineByteMask, WordByteEnabled; - assign ReplicatedByteMask = {{WORDSPERLINE}{FinalByteMask}}; - for(words = 0; words < WORDSPERLINE; words++) - assign WordByteEnabled[BYTESPERWORD*(words+1)-1:BYTESPERWORD*(words)] = {{BYTESPERWORD}{SelectedWriteWordEn[words]}}; - assign SRAMLineByteMask = ReplicatedByteMask & WordByteEnabled; - localparam integer SRAMLEN = 256; + localparam integer SRAMLEN = 128; localparam integer NUMSRAM = LINELEN/SRAMLEN; localparam integer SRAMLENINBYTES = SRAMLEN/8; localparam integer LOGNUMSRAM = $clog2(NUMSRAM); @@ -124,28 +123,9 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .Adr(RAdr), .ReadData(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]), .CacheWriteData(CacheWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]), - .WriteEnable(1'b1), .ByteMask(SRAMLineByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); + //.WriteEnable(1'b1), .ByteMask(SRAMLineByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); + .WriteEnable(SelectedWriteWordEn), .ByteMask(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); end - - -/* -----\/----- EXCLUDED -----\/----- - for(words = 0; words < 1; words++) begin: word - sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(LINELEN)) CacheDataMem(.clk, .Adr(RAdr), - .ReadData(ReadDataLine), - .CacheWriteData(CacheWriteData), - .WriteEnable(1'b1), .ByteMask(SRAMLineByteMask)); - end - -----/\----- EXCLUDED -----/\----- */ - - -/* -----\/----- EXCLUDED -----\/----- - for(words = 0; words < WORDSPERLINE; words++) begin: word - sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk, .Adr(RAdr), - .ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ), - .CacheWriteData(CacheWriteData[(words+1)*`XLEN-1:words*`XLEN]), - .WriteEnable(SelectedWriteWordEn[words]), .ByteMask(FinalByteMask)); - end - -----/\----- EXCLUDED -----/\----- */ // AND portion of distributed read multiplexers mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelEvict}, SelData); From 37bf837d48deb9cef6839d13d6405cfb128ea5c7 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Tue, 19 Jul 2022 08:59:16 -0700 Subject: [PATCH 053/138] fixed GPIO test by adding a new function to clear PLIC interrupts --- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 39 +++++++++++++++++++ .../rv32i_m/privilege/src/WALLY-gpio-01.S | 4 ++ 2 files changed, 43 insertions(+) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index 0caad5d0b..7e112c917 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -973,6 +973,45 @@ readsip_test: // read the MIP into the signature addi a6, a6, 4 j test_loop // go to next test case +claim_m_plic_interrupts: // clears one non-pending PLIC interrupt + li t2, 0x0C002000 + li t3, 0x0C200004 + li t4, 0xFFF + lw t6, 0(t2) // save current enable status + sw t4, 0(t2) // enable all relevant interrupts on PLIC + lw t5, 0(t3) // make PLIC claim + sw t5, 0(t3) // complete claim made + sw t6, 0(t2) // restore saved enable status + j test_loop + +claim_s_plic_interrupts: // clears one non-pending PLIC interrupt + li t2, 0x0C002080 + li t3, 0x0C201004 + li t4, 0xFFF + lw t6, 0(t2) // save current enable status + sw t4, 0(t2) // enable all relevant interrupts on PLIC + lw t5, 0(t3) // make PLIC claim + sw t5, 0(t3) // complete claim made + sw t6, 0(t2) // restore saved enable status + j test_loop + +uart_data_wait: + li t2, 0x10000005 // LSR + li t3, 0x10000002 // IIR + lb t4, 0(t3) // save IIR before potential clear + lb t5, 0(t2) + andi t5, t5, 1 // only care if data is ready + li t6, 1 + beq t5, t6, uart_data_ready + j uart_data_wait + +uart_data_ready: + sb t4, 0(t1) + sb t5, 1(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop + goto_s_mode: // return to address in t3, li a0, 3 // Trap handler behavior (go to supervisor mode) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index 1b3bbdb47..8fb801feb 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -140,18 +140,22 @@ SETUP_PLIC .4byte high_ie, 0x00020000, write32_test # enable high interrupt on bit 17, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised .4byte high_ie, 0x00000000, write32_test # disable high interrupt on bit 17 +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear PLIC pending interrupts .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte low_ie, 0x00010000, write32_test # enable low interrupt on bit 16, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised .4byte low_ie, 0x00000000, write32_test # disable low interrupt on bit 16 +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear PLIC pending interrupts .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte rise_ie, 0x00200000, write32_test # enable rise interrupt on bit 21, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised .4byte rise_ie, 0x00000000, write32_test # disable rise interrupt on bit 21, which is pending +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear PLIC pending interrupts .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte fall_ie, 0x01000000, write32_test # enable high interrupt on bit 24, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised .4byte fall_ie, 0x00000000, write32_test # disable high interrupt on bit 24, which is pending +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear PLIC pending interrupts .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte 0x0, 0x0, terminate_test # terminate tests From 0668659ac9dedc3990e3cfc37db9365d7b59e16c Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Tue, 19 Jul 2022 13:17:02 -0700 Subject: [PATCH 054/138] made changes to makefile, now builds fastest version (RV64im) by default. Also removed redundent CFLAG funroll-all-loops (was duplicated) --- benchmarks/coremark/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 1b619d80e..e41865986 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -10,9 +10,9 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -ARCH := rv$(XLEN)i +ARCH := rv$(XLEN)im PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ - -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -funroll-all-loops -finline-functions -falign-jumps=4 \ + -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -finline-functions -falign-jumps=4 \ -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ From 5b1adc7a675f2ddaa85e57c64a19ce3df96b4e03 Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Tue, 19 Jul 2022 13:36:18 -0700 Subject: [PATCH 055/138] commented out embench 2.0 tests --- pipelined/testbench/tests.vh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 4b1b9a160..8a2765701 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -49,24 +49,24 @@ string tvpaths[] = '{ `EMBENCH, "bd_speedopt_speed/src/aha-mont64/aha-mont64", "bd_speedopt_speed/src/crc32/crc32", - "bd_speedopt_speed/src/cubic/cubic", + "bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches "bd_speedopt_speed/src/edn/edn", "bd_speedopt_speed/src/huffbench/huffbench", "bd_speedopt_speed/src/matmult-int/matmult-int", - "bd_speedopt_speed/src/md5sum/md5sum", + // "bd_speedopt_speed/src/md5sum/md5sum", //commenting out tests from embench 2.0. When embench 2.0 launches stabilty, add these tests back "bd_speedopt_speed/src/minver/minver", "bd_speedopt_speed/src/nbody/nbody", "bd_speedopt_speed/src/nettle-aes/nettle-aes", "bd_speedopt_speed/src/nettle-sha256/nettle-sha256", "bd_speedopt_speed/src/nsichneu/nsichneu", "bd_speedopt_speed/src/picojpeg/picojpeg", - "bd_speedopt_speed/src/primecount/primecount", + // "bd_speedopt_speed/src/primecount/primecount", "bd_speedopt_speed/src/qrduino/qrduino", "bd_speedopt_speed/src/sglib-combined/sglib-combined", "bd_speedopt_speed/src/slre/slre", "bd_speedopt_speed/src/st/st", "bd_speedopt_speed/src/statemate/statemate", - "bd_speedopt_speed/src/tarfind/tarfind", + // "bd_speedopt_speed/src/tarfind/tarfind", "bd_speedopt_speed/src/ud/ud", "bd_speedopt_speed/src/wikisort/wikisort", "bd_sizeopt_speed/src/aha-mont64/aha-mont64", @@ -75,7 +75,7 @@ string tvpaths[] = '{ "bd_sizeopt_speed/src/edn/edn", "bd_sizeopt_speed/src/huffbench/huffbench", "bd_sizeopt_speed/src/matmult-int/matmult-int", - "bd_sizeopt_speed/src/md5sum/md5sum", + // "bd_sizeopt_speed/src/md5sum/md5sum", "bd_sizeopt_speed/src/minver/minver", "bd_sizeopt_speed/src/nbody/nbody", "bd_sizeopt_speed/src/nettle-aes/nettle-aes", @@ -88,7 +88,7 @@ string tvpaths[] = '{ "bd_sizeopt_speed/src/slre/slre", "bd_sizeopt_speed/src/st/st", "bd_sizeopt_speed/src/statemate/statemate", - "bd_sizeopt_speed/src/tarfind/tarfind", + // "bd_sizeopt_speed/src/tarfind/tarfind", "bd_sizeopt_speed/src/ud/ud", "bd_sizeopt_speed/src/wikisort/wikisort" }; From 20800b2714d9d95daa4c8b1f81ec5a29767fa0cc Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Tue, 19 Jul 2022 15:16:12 -0700 Subject: [PATCH 056/138] embench no longer launches run automatiacally, need to use make run --- benchmarks/embench/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index 255ea8be5..630864ded 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -4,7 +4,8 @@ embench_dir = ../../addins/embench-iot -all: build sim size +all: build +run: sim size allClean: clean all From d61f84e7510aa98ad3aae1af8d45994d532ad74f Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 19 Jul 2022 23:44:37 +0000 Subject: [PATCH 057/138] oprimized zeros and replaced complex ?: with always_comb --- pipelined/src/fpu/fcvt.sv | 21 ++++++--- pipelined/src/fpu/fmashiftcalc.sv | 4 -- pipelined/src/fpu/negateintres.sv | 9 +++- pipelined/src/fpu/resultsign.sv | 25 ++++++++--- pipelined/src/fpu/shiftcorrection.sv | 5 ++- pipelined/src/fpu/specialcase.sv | 65 ++++++++++++++++++---------- 6 files changed, 85 insertions(+), 44 deletions(-) diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index b9932523a..9d7f2d62d 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -103,7 +103,7 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} : + assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} : {Xm, {`CVTLEN-`NF{1'b0}}}; assign LzcIn = LzcInFull[`CVTLEN-1:0]; @@ -125,9 +125,10 @@ module fcvt ( // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} : - ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : - (LeadingZeros); + always_comb + if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}}; + else if (ResDenormUf&~IntToFp) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0]; + else ShiftAmt = LeadingZeros; /////////////////////////////////////////////////////////////////////////// // exp calculations @@ -150,7 +151,9 @@ module fcvt ( assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); end else if (`FPSIZES == 2) begin - assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + logic [`NE-2:0] NewBiasToFp; + assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; end else if (`FPSIZES == 3) begin logic [`NE-2:0] NewBiasToFp; @@ -177,7 +180,7 @@ module fcvt ( // select the old exponent // int -> fp : largest bias + XLEN // fp -> ??? : XExp - assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : Xe; + assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe; // calculate CalcExp // fp -> fp : @@ -222,7 +225,11 @@ module fcvt ( // - if 64-bit : check the msb of the 64-bit integer input and if it's signed // - if 32-bit : check the msb of the 32-bit integer input and if it's signed // - otherwise: the floating point input's sign - assign Cs = IntToFp ? Int64 ? Int[`XLEN-1]&Signed : Int[31]&Signed : Xs; + always_comb + if(IntToFp) + if(Int64) Cs = Int[`XLEN-1]&Signed; + else Cs = Int[31]&Signed; + else Cs = Xs; endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 79953b212..d598efb7e 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -42,7 +42,6 @@ module fmashiftcalc( output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [3*`NF+8:0] FmaShiftIn // is the sum zero ); - logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias logic [`NE+1:0] BiasCorr; @@ -149,9 +148,6 @@ module fmashiftcalc( // Determine if the result is denormal // assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero; - // Determine the shift needed for denormal results - // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm ? NormSumExp[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; diff --git a/pipelined/src/fpu/negateintres.sv b/pipelined/src/fpu/negateintres.sv index dde515b94..7a696b379 100644 --- a/pipelined/src/fpu/negateintres.sv +++ b/pipelined/src/fpu/negateintres.sv @@ -42,7 +42,12 @@ module negateintres( // round and negate the positive res if needed assign CvtNegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}; - assign CvtNegResMsbs = Signed ? Int64 ? CvtNegRes[`XLEN:`XLEN-1] : CvtNegRes[32:31] : - Int64 ? CvtNegRes[`XLEN+1:`XLEN] : CvtNegRes[33:32]; + always_comb + if(Signed) + if(Int64) CvtNegResMsbs = CvtNegRes[`XLEN:`XLEN-1]; + else CvtNegResMsbs = CvtNegRes[32:31]; + else + if(Int64) CvtNegResMsbs = CvtNegRes[`XLEN+1:`XLEN]; + else CvtNegResMsbs = CvtNegRes[33:32]; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index b8019b98b..c2912ece7 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -46,11 +46,21 @@ module resultsign( logic Zeros; logic Infs; - // Determine the sign if the sum is zero - // if cancelation then 0 unless round to -infinity - // if multiply then Psgn - // otherwise psign - assign Zeros = (FmaPs^FmaAs)&~(FmaMe[`NE+1] | ((FmaMe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + // The IEEE754-2019 standard specifies: + // - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity + // - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result + // - if x = +0 or -0 then x+x=x and x-(-x)=x + // - the sign of a product is the exclisive or or the opperand's signs + // Zero sign will only be selected if: + // - P=Z and a cancelation occurs - exact zero + // - Z is zero and P is zero - exact zero + // - P is killed and Z is zero - Psgn + // - Z is killed and P is zero - impossible + // Zero sign calculation: + // - if a multiply opperation is done, then use the products sign(Ps) + // - if the zero sum is not exactly zero i.e. R|S use the sign of the exact result (which is the product's sign) + // - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign + assign Zeros = (FmaPs^FmaAs)&~(R|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive @@ -58,6 +68,9 @@ module resultsign( // if -p + z is the Sum positive // if -p - z then the Sum is negitive assign Infs = ZInf ? FmaAs : FmaPs; - assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms; + always_comb + if(InfIn&FmaOp) Ws = Infs; + else if(FmaSZero&FmaOp) Ws = Zeros; + else Ws = Ms; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index 50cffb078..514edbee1 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -55,7 +55,10 @@ module shiftcorrection( // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits - assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + always_comb + if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}}; + else if (DivOp&~DivResDenorm) Mf = CorrQuotShifted; + else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv index 3c28eae2e..6014962a1 100644 --- a/pipelined/src/fpu/specialcase.sv +++ b/pipelined/src/fpu/specialcase.sv @@ -95,9 +95,14 @@ module specialcase( end else begin assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end - - assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} : - OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; + + always_comb + if(OutFmt) + if(OfResMax) OfRes = {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}}; + else OfRes = {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + else + if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}}; + else OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; @@ -234,20 +239,21 @@ module specialcase( assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); // output infinity with result sign if divide by zero - if(`IEEE754) begin - assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes : - YNaN&~CvtOp ? YNaNRes : - ZNaN&FmaOp ? ZNaNRes : - Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end else begin - assign PostProcRes = NaNIn|Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end + if(`IEEE754) + always_comb + if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes; + else if(YNaN&~CvtOp) PostProcRes = YNaNRes; + else if(ZNaN&FmaOp) PostProcRes = ZNaNRes; + else if(Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; + else + always_comb + if(NaNIn|Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; /////////////////////////////////////////////////////////////////////////////////////// // @@ -272,10 +278,17 @@ module specialcase( // unsigned | 2^32-1 | 2^64-1 | // // other: 32 bit unsinged res should be sign extended as if it were a signed number - assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive - Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive - Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive - {`XLEN{1'b1}};// unsigned positive + always_comb + if(Signed) + if(Xs&~XNaN) // signed negitive + if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}}; + else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}}; + else // signed positive + if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}}; + else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}}; + else + if(Xs&~XNaN) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive + else OfIntRes = {`XLEN{1'b1}}; // unsigned positive // select the integer output @@ -284,7 +297,11 @@ module specialcase( // - if rounding and signed opperation and negitive input, output -1 // - otherwise output a rounded 0 // - otherwise output the normal res (trmined and sign extended if nessisary) - assign FCvtIntRes = IntInvalid ? OfIntRes : - CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? - Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; + always_comb + if(IntInvalid) FCvtIntRes = OfIntRes; + else if(CvtCe[`NE]) + if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}}; + else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1}; + else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0]; + else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; endmodule \ No newline at end of file From 0f94177765c6a47ff7c468d2136f180803de623d Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 20 Jul 2022 01:36:25 +0000 Subject: [PATCH 058/138] small changes --- pipelined/src/fpu/divsqrt.sv | 2 +- pipelined/src/fpu/srt.sv | 12 ++++++------ pipelined/srt/srt.sv | 29 +++++++++++++++++------------ 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index cbf7f95f0..ffc60026b 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -41,7 +41,7 @@ module divsqrt( input logic XNaNE, YNaNE, input logic DivStartE, input logic StallM, - input logic StallE, + input logic StallE, output logic DivStickyM, output logic DivBusy, output logic DivDone, diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 9e0315113..ee5ae9a39 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -34,18 +34,18 @@ module srt( input logic clk, input logic DivStart, input logic DivBusy, - input logic [`FMTBITS-1:0] FmtE, + input logic [`FMTBITS-1:0] FmtE, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, + input logic [`DIVLEN-1:0] X, + input logic [`DIVLEN-1:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, output logic [`QLEN-1-(`RADIX/4):0] Quot, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] StickyWSA, output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] DivCalcExpM, + output logic [`NE+1:0] DivCalcExpM, output logic [`XLEN-1:0] Rem ); diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 949335bf0..13a59d848 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -55,7 +55,7 @@ module srt ( logic qp, qz, qn; // quotient is +1, 0, or -1 logic [`NE-1:0] calcExp; logic calcSign; - logic [`DIVLEN+3:0] X, Dpreproc, C, F, AddIn; + logic [`DIVLEN+3:0] X, Dpreproc, C, F, S, SM, AddIn; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; logic intSign; @@ -90,8 +90,9 @@ module srt ( // If only implementing division, use divide otfc // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot); // otherwise use sotfc - creg sotfcC(clk, Start, C); - sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F); + creg sotfcC(clk, Start, Sqrt, C); + sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, Quot, S, SM); + fsel2 fsel(qp, qn, C, S, SM, F); // Adder input selection assign AddIn = Sqrt ? F : Dsel; @@ -214,11 +215,16 @@ module fsel2 ( // Generate for both positive and negative bits assign FP = ~S & C; assign FN = SM | (C & (~C << 2)); - assign FZ = {(`DIVLEN+4){1'b0}}; + assign FZ = '0; // Choose which adder input will be used - assign F = sp ? FP : (sn ? FN : FZ); + always_comb + if (sp) F = FP; + else if (sn) F = FN; + else F = FZ; + + // assign F = sp ? FP : (sn ? FN : FZ); endmodule @@ -266,17 +272,18 @@ module sotfc2( input logic clk, input logic Start, input logic sp, sn, + input logic Sqrt, input logic [`DIVLEN+3:0] C, output logic [`DIVLEN-2:0] Sq, - output logic [`DIVLEN+3:0] F + output logic [`DIVLEN+3:0] S, SM ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. // Use this otfc for division and square root. - logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; + logic [`DIVLEN+3:0] SNext, SMNext, SMux; flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); - mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux); + mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); flop #(`DIVLEN+4) Sreg(clk, SMux, S); always_comb begin @@ -292,9 +299,6 @@ module sotfc2( end end assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; - - fsel2 fsel(sp, sn, C, S, SM, F); - endmodule ////////////////////////// @@ -302,11 +306,12 @@ endmodule ////////////////////////// module creg(input logic clk, input logic Start, + input logic Sqrt, output logic [`DIVLEN+3:0] C ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule From 36bd17984bf50b9da89424a69ba32d788ba45c49 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 20 Jul 2022 01:49:33 +0000 Subject: [PATCH 059/138] Reordered embench Makefile to run size tests first --- benchmarks/embench/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index 630864ded..21c30cf6e 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -5,7 +5,7 @@ embench_dir = ../../addins/embench-iot all: build -run: sim size +run: size sim allClean: clean all From c3a4a2abdf14068e85653f22a6e8a9221775ba44 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 20 Jul 2022 02:00:43 +0000 Subject: [PATCH 060/138] New radix-2 algorithm implemented and working --- pipelined/srt/srt.sv | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 13a59d848..ed55ddd8d 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -75,7 +75,7 @@ module srt ( // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], Sqrt, qp, qz, qn); + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], Sqrt, qp, qz, qn); flopen #(`NE) expflop(clk, Start, calcExp, rExp); flopen #(1) signflop(clk, Start, calcSign, rsign); @@ -154,7 +154,7 @@ module srtpreproc ( // Selecting correct divider inputs assign DivX = Int ? PreprocA : PreprocX; - assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac}; + assign SqrtX = XExp[0] ? {5'b11101, SrcXFrac} : {4'b1111, SrcXFrac, 1'b0}; assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX}; assign D = {4'b0001, Int ? PreprocB : PreprocY}; @@ -169,13 +169,13 @@ endmodule ///////////////////////////////// // Quotient Selection, Radix 2 // ///////////////////////////////// -module qsel2 ( // *** eventually just change to 4 bits - input logic [`DIVLEN+3:`DIVLEN-1] ps, pc, +module qsel2 ( + input logic [`DIVLEN+3:`DIVLEN] ps, pc, input logic Sqrt, output logic qp, qz, qn ); - logic [`DIVLEN+3:`DIVLEN-1] p, g; + logic [`DIVLEN+3:`DIVLEN] p, g; logic magnitude, sign, cout; // The quotient selection logic is presented for simplicity, not @@ -186,8 +186,8 @@ module qsel2 ( // *** eventually just change to 4 bits assign p = ps ^ pc; assign g = ps & pc; - assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]); - assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (Sqrt & (p[`DIVLEN] & g[`DIVLEN-1]))))); + assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); + assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN]))); assign #1 sign = p[`DIVLEN+3] ^ cout; /* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & (ps[52]^pc[52])); @@ -283,7 +283,7 @@ module sotfc2( logic [`DIVLEN+3:0] SNext, SMNext, SMux; flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); - mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); + mux2 #(`DIVLEN+4) Smux(SNext, {2'b00, Sqrt, {(`DIVLEN+1){1'b0}}}, Start, SMux); flop #(`DIVLEN+4) Sreg(clk, SMux, S); always_comb begin @@ -298,7 +298,7 @@ module sotfc2( SMNext = SM | ((C << 1) & ~(C << 2)); end end - assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; + assign Sq = S[`DIVLEN+1] ? S[`DIVLEN:2] : S[`DIVLEN-1:1]; endmodule ////////////////////////// @@ -311,7 +311,7 @@ module creg(input logic clk, ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule From cce57fdcc569fd7631c37b27dcbf3a1d1ff928b5 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 20 Jul 2022 02:04:20 +0000 Subject: [PATCH 061/138] divsqrt working for floating point --- pipelined/srt/srt.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index ed55ddd8d..27fac324e 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -311,7 +311,7 @@ module creg(input logic clk, ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b11111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule From b26297e87490c5f6e986967a4d89d36d6ccb901a Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 20 Jul 2022 02:27:39 +0000 Subject: [PATCH 062/138] moved ctrl signal registers into fctrl, also a lot of code cleaning --- pipelined/src/fpu/divsqrt.sv | 20 +- pipelined/src/fpu/fclassify.sv | 38 +-- pipelined/src/fpu/fcmp.sv | 138 ++++++----- pipelined/src/fpu/fctrl.sv | 51 +++- pipelined/src/fpu/fcvt.sv | 12 +- pipelined/src/fpu/fhazard.sv | 28 +-- pipelined/src/fpu/fma.sv | 18 +- pipelined/src/fpu/fmashiftcalc.sv | 8 +- pipelined/src/fpu/fpu.sv | 336 +++++++++++++-------------- pipelined/src/fpu/fsgninj.sv | 45 ++-- pipelined/src/fpu/otfc.sv | 2 +- pipelined/src/fpu/postprocess.sv | 20 +- pipelined/src/fpu/qsel.sv | 2 +- pipelined/src/fpu/shiftcorrection.sv | 6 +- pipelined/src/fpu/srt.sv | 26 +-- pipelined/src/fpu/srtfsm.sv | 6 +- pipelined/src/fpu/unpack.sv | 45 ++-- pipelined/src/fpu/unpackinput.sv | 38 +-- 18 files changed, 439 insertions(+), 400 deletions(-) diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index ffc60026b..a2f0ba8e3 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -34,20 +34,20 @@ module divsqrt( input logic clk, input logic reset, input logic [`FMTBITS-1:0] FmtE, - input logic [`NF:0] XManE, YManE, - input logic [`NE-1:0] XExpE, YExpE, + input logic [`NF:0] XmE, YmE, + input logic [`NE-1:0] XeE, YeE, input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, input logic DivStartE, input logic StallM, - input logic StallE, - output logic DivStickyM, + input logic StallE, + output logic DivSM, output logic DivBusy, output logic DivDone, - output logic [`NE+1:0] DivCalcExpM, + output logic [`NE+1:0] QeM, output logic [`DURLEN-1:0] EarlyTermShiftM, - output logic [`QLEN-1-(`RADIX/4):0] QuotM + output logic [`QLEN-1-(`RADIX/4):0] QmM // output logic [`XLEN-1:0] RemM, ); @@ -60,10 +60,10 @@ module divsqrt( logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, + srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE, - .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); + srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv index 6c7ab451f..6aaec00a4 100644 --- a/pipelined/src/fpu/fclassify.sv +++ b/pipelined/src/fpu/fclassify.sv @@ -29,29 +29,29 @@ `include "wally-config.vh" module fclassify ( - input logic XSgnE, // sign bit - input logic XNaNE, // is NaN - input logic XSNaNE, // is signaling NaN - input logic XDenormE, // is denormal - input logic XZeroE, // is zero - input logic XInfE, // is infinity - output logic [`XLEN-1:0] ClassResE // classify result - ); + input logic Xs, // sign bit + input logic XNaN, // is NaN + input logic XSNaN, // is signaling NaN + input logic XDenorm,// is denormal + input logic XZero, // is zero + input logic XInf, // is infinity + output logic [`XLEN-1:0] ClassRes// classify result +); logic PInf, PZero, PNorm, PDenorm; logic NInf, NZero, NNorm, NDenorm; - logic XNormE; + logic XNorm; // determine the sub categories - assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE); - assign PInf = ~XSgnE&XInfE; - assign NInf = XSgnE&XInfE; - assign PNorm = ~XSgnE&XNormE; - assign NNorm = XSgnE&XNormE; - assign PDenorm = ~XSgnE&XDenormE; - assign NDenorm = XSgnE&XDenormE; - assign PZero = ~XSgnE&XZeroE; - assign NZero = XSgnE&XZeroE; + assign XNorm= ~(XNaN | XInf| XDenorm| XZero); + assign PInf = ~Xs&XInf; + assign NInf = Xs&XInf; + assign PNorm = ~Xs&XNorm; + assign NNorm = Xs&XNorm; + assign PDenorm = ~Xs&XDenorm; + assign NDenorm = Xs&XDenorm; + assign PZero = ~Xs&XZero; + assign NZero = Xs&XZero; // determine sub category and combine into the result // bit 0 - -Inf @@ -64,6 +64,6 @@ module fclassify ( // bit 7 - +Inf // bit 8 - signaling NaN // bit 9 - quiet NaN - assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; + assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; endmodule diff --git a/pipelined/src/fpu/fcmp.sv b/pipelined/src/fpu/fcmp.sv index 9c6757848..48ff536f6 100755 --- a/pipelined/src/fpu/fcmp.sv +++ b/pipelined/src/fpu/fcmp.sv @@ -27,9 +27,10 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // OR OTHER DEALINGS IN THE SOFTWARE. //////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" -// FOpCtrlE values +// OpCtrl values // 110 min // 101 max // 010 equal @@ -37,36 +38,32 @@ // 011 less than or equal module fcmp ( - input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single - input logic [2:0] FOpCtrlE, // see above table - input logic XSgnE, YSgnE, // input signs - input logic [`NE-1:0] XExpE, YExpE, // input exponents - input logic [`NF:0] XManE, YManE, // input mantissa - input logic XZeroE, YZeroE, // is zero - input logic XNaNE, YNaNE, // is NaN - input logic XSNaNE, YSNaNE, // is signaling NaN - input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs - output logic CmpNVE, // invalid flag - output logic [`FLEN-1:0] CmpFpResE, // compare resilt - output logic [`XLEN-1:0] CmpIntResE // compare resilt + input logic [`FMTBITS-1:0] Fmt, // format of fp number + input logic [2:0] OpCtrl, // see above table + input logic Xs, Ys, // input signs + input logic [`NE-1:0] Xe, Ye, // input exponents + input logic [`NF:0] Xm, Ym, // input mantissa + input logic XZero, YZero, // is zero + input logic XNaN, YNaN, // is NaN + input logic XSNaN, YSNaN, // is signaling NaN + input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker) + output logic CmpNV, // invalid flag + output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result + output logic [`XLEN-1:0] CmpIntRes // compare integer result ); - logic LTabs, LT, EQ; // is X < or > or = Y - logic [`FLEN-1:0] NaNRes; - logic BothZero, EitherNaN, EitherSNaN; + logic LTabs, LT, EQ; // is X < or > or = Y + logic [`FLEN-1:0] NaNRes; // NaN result + logic BothZero; // are both inputs zero + logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN - assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers - assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs); - // assign LT = {~XSgnE, XExpE, XManE[`NF-1:0]} < {~YSgnE, YExpE, YManE[`NF-1:0]}; // *** James look at whether we can simplify to this, but it fails regression + assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers + assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison + assign EQ = (X == Y); - //assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]}); - //assign LT = XInt < YInt; -// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE Y + else // MIN + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = X; // X < Y + else CmpFpRes = Y; // X > Y + + // LT/LE/EQ + // - -0 = 0 + // - inf = inf and -inf = -inf + // - return 0 if comparison with NaN (unordered) + assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN}; endmodule diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 5c553e864..85047248d 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -29,25 +29,41 @@ `include "wally-config.vh" module fctrl ( + input logic clk, + input logic reset, + input logic StallE, StallM, StallW, // stall signals + input logic FlushE, FlushM, FlushW, // flush signals + input logic [31:0] InstrD, input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision input logic [6:0] OpD, // bits 6:0 of instruction input logic [4:0] Rs2D, // bits 24:20 of instruction input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode input logic [2:0] FRM_REGW, // rounding mode from CSR input logic [1:0] STATUS_FS, // is FPU enabled? + input logic FDivBusyE, // is the divider busy output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction - output logic FRegWriteD, // FP register write enable - output logic FDivStartD, // Start division or squareroot - output logic [1:0] FResSelD, // select result to be written to fp register - output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit - output logic [1:0] PostProcSelD, - output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1 - output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - output logic FWriteIntD // is the result written to the integer register + output logic FRegWriteM, FRegWriteW, // FP register write enable + output logic [2:0] FrmM, // FP rounding mode + output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format + output logic DivStartE, // Start division or squareroot + output logic FWriteIntE, FWriteIntM, // Write to integer register + output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component + output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage + output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit + output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input ); `define FCTRLW 11 logic [`FCTRLW-1:0] ControlsD; + logic FRegWriteD; // FP register write enable + logic DivStartD; // integer register write enable + logic FWriteIntD; // integer register write enable + logic FRegWriteE; // FP register write enable + logic [2:0] OpCtrlD; // Select which opperation to do in each component + logic [1:0] PostProcSelD; // select result in the post processing unit + logic [1:0] FResSelD; // Select one of the results that finish in the memory stage + logic [2:0] FrmD, FrmE; // FP rounding mode + logic [`FMTBITS-1:0] FmtD; // FP format //*** will putting x for don't cares reduce area in synthisis??? // FPU Instruction Decoder always_comb @@ -130,7 +146,7 @@ module fctrl ( endcase // unswizzle control bits - assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD; + assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD; // rounding modes: // 000 - round to nearest, ties to even @@ -168,7 +184,7 @@ module fctrl ( // 10 fma // Other Sel: -// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]} +// Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]} // 000 - sign 00 // 001 - negate sign 00 // 010 - xor sign 00 @@ -205,5 +221,20 @@ module fctrl ( // 01 - negate sign // 10 - xor sign + // D/E pipleine register + flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE}); + flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE); + // E/M pipleine register + flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, + {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE}, + {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM}); + // M/W pipleine register + flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FRegWriteM, FResSelM}, + {FRegWriteW, FResSelW}); endmodule diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 9d7f2d62d..d2967887f 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -35,7 +35,7 @@ module fcvt ( input logic [`NE-1:0] Xe, // input's exponent input logic [`NF:0] Xm, // input's fraction input logic [`XLEN-1:0] Int, // integer input - from IEU - input logic [2:0] FOpCtrl, // choose which opperation (look below for values) + input logic [2:0] OpCtrl, // choose which opperation (look below for values) input logic ToInt, // is fp->int (since it's writting to the integer register) input logic XZero, // is the input zero input logic XDenorm, // is the input denormalized @@ -73,17 +73,17 @@ module fcvt ( // seperate OpCtrl for code readability - assign Signed = FOpCtrl[0]; - assign Int64 = FOpCtrl[1]; - assign IntToFp = FOpCtrl[2]; + assign Signed = OpCtrl[0]; + assign Int64 = OpCtrl[1]; + assign IntToFp = OpCtrl[2]; // choose the ouptut format depending on the opperation // - fp -> fp: OpCtrl contains the percision of the output // - int -> fp: Fmt contains the percision of the output if (`FPSIZES == 2) - assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT); + assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT); else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0]; + assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0]; /////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/fpu/fhazard.sv b/pipelined/src/fpu/fhazard.sv index ca31d904f..36a0ff82f 100644 --- a/pipelined/src/fpu/fhazard.sv +++ b/pipelined/src/fpu/fhazard.sv @@ -31,20 +31,20 @@ `include "wally-config.vh" module fhazard( - input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses - input logic FRegWriteM, FRegWriteW, // is the fp register being written to - input logic [4:0] RdM, RdW, // the adress being written to - input logic [1:0] FResSelM, // the result being selected + input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses + input logic FRegWriteM, FRegWriteW, // is the fp register being written to + input logic [4:0] RdM, RdW, // the adress being written to + input logic [1:0] FResSelM, // the result being selected output logic FStallD, // stall the decode stage - output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value + output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value ); always_comb begin // set defaults - FForwardXE = 2'b00; // choose FRD1E - FForwardYE = 2'b00; // choose FRD2E - FForwardZE = 2'b00; // choose FRD3E + ForwardXE = 2'b00; // choose FRD1E + ForwardYE = 2'b00; // choose FRD2E + ForwardZE = 2'b00; // choose FRD3E FStallD = 0; //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait @@ -52,28 +52,28 @@ module fhazard( // if the needed value is in the memory stage - input 1 if ((Adr1E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM + if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage - else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W + else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W // if the needed value is in the memory stage - input 2 if ((Adr2E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM + if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage - else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W + else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W // if the needed value is in the memory stage - input 3 if ((Adr3E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM + if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage - else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W + else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W end diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 3f4cc2ac5..067147ee6 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -34,7 +34,7 @@ module fma( input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format input logic XZero, YZero, ZZero, // is the input zero - input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign output logic ZmSticky, // sticky bit that is calculated during alignment @@ -46,7 +46,7 @@ module fma( output logic Ps, // the product's sign output logic Ss, // the sum's sign output logic [`NE+1:0] Se, - output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count ); logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format @@ -72,7 +72,7 @@ module fma( // Alignment shifter /////////////////////////////////////////////////////////////////////////////// // calculate the signs and take the opperation into account - sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); + sign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As); align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ZmSticky, .KillProd); @@ -85,7 +85,7 @@ module fma( add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); + loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); endmodule @@ -120,7 +120,7 @@ endmodule module sign( - input logic [2:0] FOpCtrl, // opperation contol + input logic [2:0] OpCtrl, // opperation contol input logic Xs, Ys, Zs, // sign of the inputs output logic Ps, // the product's sign - takes opperation into account output logic As // aligned addend sign used in fma - takes opperation into account @@ -130,9 +130,9 @@ module sign( // Negate product's sign if FNMADD or FNMSUB // flip is negation opperation - assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]); + assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); // flip if subtraction - assign As = Zs^FOpCtrl[0]; + assign As = Zs^OpCtrl[0]; endmodule @@ -275,7 +275,7 @@ endmodule module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend input logic [2*`NF+3:0] P, // product - output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); logic [3*`NF+6:0] T; @@ -300,6 +300,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE - lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt)); + lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt)); endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index d598efb7e..7464149f6 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -32,7 +32,7 @@ module fmashiftcalc( input logic [3*`NF+5:0] FmaSm, // the positive sum input logic [`NE-1:0] Ze, // exponent of Z input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias - input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count + input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero input logic [`NE+1:0] FmaSe, @@ -52,7 +52,7 @@ module fmashiftcalc( // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent - assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); + assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin @@ -152,7 +152,7 @@ module fmashiftcalc( // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; if (`FPSIZES == 1) - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1; + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1; else - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1; + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1; endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index a9c0ac247..6d9b9cf47 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -30,28 +30,28 @@ `include "wally-config.vh" module fpu ( - input logic clk, - input logic reset, - input logic [2:0] FRM_REGW, // Rounding mode from CSR - input logic [31:0] InstrD, // instruction from IFU - input logic [`FLEN-1:0] ReadDataW,// Read data from memory - input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input being processed (from IEU) - input logic StallE, StallM, StallW, // stall signals from HZU - input logic FlushE, FlushM, FlushW, // flush signals from HZU - input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) - input logic [1:0] STATUS_FS, // Is floating-point enabled? - output logic FRegWriteM, // FP register write enable - output logic FpLoadStoreM, // Fp load instruction? - output logic FStore2, - output logic FStallD, // Stall the decode stage - output logic FWriteIntE, // integer register write enables - output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory - output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory - output logic [`XLEN-1:0] FIntResM, // data to be written to integer register - output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register - output logic [1:0] FResSelW, - output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + input logic clk, + input logic reset, + input logic [2:0] FRM_REGW, // Rounding mode (from CSR) + input logic [31:0] InstrD, // instruction (from IFU) + input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU) + input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input (from IEU) + input logic StallE, StallM, StallW, // stall signals (from HZU) + input logic FlushE, FlushM, FlushW, // flush signals (from HZU) + input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) + input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit) + output logic FRegWriteM, // FP register write enable (to privileged unit) + output logic FpLoadStoreM, // Fp load instruction? (to LSU) + output logic FStore2, // store two words into memory (to LSU) + output logic FStallD, // Stall the decode stage (To HZU) + output logic FWriteIntE, // integer register write enable (to IEU) + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN + output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN + output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) + output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) + output logic [1:0] FResSelW, // final result selection (to IEU) + output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU) + output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit) output logic [4:0] SetFflagsM // FPU flags (to privileged unit) ); @@ -62,99 +62,88 @@ module fpu ( // - sets the underflow after rounding // control signals - logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division or squareroot - logic FWriteIntD; // Write to integer register - logic FWriteIntM; // Write to integer register - logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals - logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage - logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit - logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + logic FRegWriteW; // FP register write enable + logic [2:0] FrmM; // FP rounding mode + logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double + logic DivStartE; // Start division or squareroot + logic FWriteIntM; // Write to integer register + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals + logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage + logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit + logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input // regfile signals - logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding) - logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) - logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) - logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) + logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding) + logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) + logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding) + logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding) // unpacking signals - logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM, YSgnM; // input's sign - memory stage - logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage - logic [`NE-1:0] ZExpM; // input's exponent - memory stage - logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage - logic [`NF:0] XManM, YManM, ZManM; // input's fraction - memory stage - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XNaNQ, YNaNQ; // is the input a NaN - divide - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage - logic XZeroQ, YZeroQ; // is the input zero - divide - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XInfQ, YInfQ; // is the input infinity - divide - logic XExpMaxE; // is the exponent all ones (max value) - logic FmtQ; - logic FOpCtrlQ; + logic XsE, YsE, ZsE; // input's sign - execute stage + logic XsM, YsM; // input's sign - memory stage + logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage + logic [`NE-1:0] ZeM; // input's exponent - memory stage + logic [`NF:0] XmE, YmE, ZmE; // input's fraction - execute stage + logic [`NF:0] XmM, YmM, ZmM; // input's fraction - memory stage + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XNaNQ, YNaNQ; // is the input a NaN - divide + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XExpMaxE; // is the exponent all ones (max value) // Fma Signals - logic [3*`NF+5:0] SumE, SumM; - logic [`NE+1:0] ProdExpE, ProdExpM; - logic AddendStickyE, AddendStickyM; - logic [`NE+1:0] SeE,SeM; - logic KillProdE, KillProdM; - logic InvAE, InvAM; - logic NegSumE, NegSumM; - logic ZSgnEffE, ZSgnEffM; - logic PSgnE, PSgnM; - logic SsE, SsM; - logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; + logic [3*`NF+5:0] SmE, SmM; + logic [`NE+1:0] PeE, PeM; + logic ZmStickyE, ZmStickyM; + logic [`NE+1:0] SeE,SeM; + logic KillProdE, KillProdM; + logic InvAE, InvAM; + logic NegSumE, NegSumM; + logic AsE, AsM; + logic PsE, PsM; + logic SsE, SsM; + logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM; // Cvt Signals - logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent - logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic [`NE:0] CeE, CeM; // the calculated expoent + logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized - logic CvtResSgnE, CvtResSgnM; // the result's sign + logic CsE, CsM; // the result's sign logic IntZeroE, IntZeroM; // is the integer zero? - logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`QLEN-1-(`RADIX/4):0] QuotM; - logic [`NE+1:0] DivCalcExpE, DivCalcExpM; - logic DivStickyE, DivStickyM; - logic DivDoneM; - logic [`DURLEN-1:0] EarlyTermShiftM; + logic [`QLEN-1-(`RADIX/4):0] QmM; + logic [`NE+1:0] QeE, QeM; + logic DivSE, DivSM; + logic DivDoneM; + logic [`DURLEN-1:0] EarlyTermShiftM; // result and flag signals - logic [63:0] FDivResM, FDivResW; // divide/squareroot result - logic [4:0] FDivFlgM; // divide/squareroot flags - logic [`FLEN-1:0] ReadResW; // read result (load instruction) - logic [`XLEN-1:0] ClassResE; // classify result - logic [`XLEN-1:0] FIntResE; // classify result - logic [`FLEN-1:0] FpResM, FpResW; // classify result - logic [`FLEN-1:0] PostProcResM; // classify result - logic [4:0] PostProcFlgM; // classify result + logic [`XLEN-1:0] ClassResE; // classify result + logic [`XLEN-1:0] FIntResE; // classify result + logic [`FLEN-1:0] FpResM, FpResW; // classify result + logic [`FLEN-1:0] PostProcResM; // classify result + logic [4:0] PostProcFlgM; // classify result logic [`XLEN-1:0] FCvtIntResM; - logic [`FLEN-1:0] CmpFpResE; // compare result - logic [`XLEN-1:0] CmpIntResE; // compare result - logic CmpNVE; // compare invalid flag (Not Valid) - logic [`FLEN-1:0] SgnResE; // sign injection result - logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage - logic PreNVE, PreNVM; // selected flag that is ready in the memory stage - logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register + logic [`FLEN-1:0] CmpFpResE; // compare result + logic [`XLEN-1:0] CmpIntResE; // compare result + logic CmpNVE; // compare invalid flag (Not Valid) + logic [`FLEN-1:0] SgnResE; // sign injection result + logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage + logic PreNVE, PreNVM; // selected flag that is ready in the memory stage + logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register // other signals - logic FDivSqrtDoneE; // is divide done - logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit - logic load_preload; // enable for FF on fpdivsqrt - logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format + logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed @@ -171,9 +160,11 @@ module fpu ( ////////////////////////////////////////////////////////////////////////////////////////// // calculate FP control signals - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS, - .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD, - .FmtD, .FrmD, .FWriteIntD); + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD, + .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, + .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, + .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, + .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E); // FP register file fregfile fregfile (.clk, .reset, .we4(FRegWriteW), @@ -185,12 +176,6 @@ module fpu ( flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); - flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); // EXECUTION STAGE @@ -207,12 +192,12 @@ module fpu ( // Hazard unit for FPU // - determines if any forwarding or stalls are needed fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, - .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); + .FStallD, .ForwardXE, .ForwardYE, .ForwardZE); // forwarding muxs - mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE); - mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE); - mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE); + mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE); + mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, ForwardYE, PreYE); + mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, ForwardZE, PreZE); generate @@ -227,7 +212,7 @@ module fpu ( endgenerate - mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions + mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions // Force Z to be 0 for multiply instructions generate @@ -241,55 +226,76 @@ module fpu ( (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes endgenerate - mux3 #(`FLEN) fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); + mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE); // unpack unit // - splits FP inputs into their various parts // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE); + unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), + .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), + .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), + .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE), + .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), + .ZInf(ZInfE), .XExpMax(XExpMaxE)); - // fma - does multiply, add, and multiply-add instructions - fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), - .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), - .Xm(XManE), .Ym(YManE), .Zm(ZManE), + // fused multiply add + // - fadd/fsub + // - fmul + // - fmadd/fnmadd/fmsub/fnmsub + fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), + .Xe(XeE), .Ye(YeE), .Ze(ZeE), + .Xm(XmE), .Ym(YmE), .Zm(ZmE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), - .FOpCtrl(FOpCtrlE), .Fmt(FmtE), - .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE), - .Sm(SumE), .Pe(ProdExpE), - .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), - .ZmSticky(AddendStickyE), .KillProd(KillProdE)); + .OpCtrl(OpCtrlE), .Fmt(FmtE), + .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), + .Sm(SmE), .Pe(PeE), + .NegSum(NegSumE), .InvA(InvAE), .SCnt(SCntE), + .ZmSticky(ZmStickyE), .KillProd(KillProdE)); - divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, - .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), - .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal - .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); - // other FP execution units - fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, - .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE); - fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE); - fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE); + // divide and squareroot + // - fdiv + // - fsqrt + // *** add other opperations + divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, + .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), + .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal + .EarlyTermShiftM, .QmM, .DivDone(DivDoneM)); + // compare + // - fmin/fmax + // - flt/fle/feq + fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), + .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), + .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), + .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); + // sign injection + // - fsgnj/fsgnjx/fsgnjn + fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE)); - fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE), - .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE), - .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE), + // classify + // - fclass + fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE), + .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE)); + + // convert + // - fcvt.*.* + fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), + .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE), + .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); // data to be stored in memory - to IEU // - FP uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s if (`LLEN==`XLEN) begin - assign FWriteDataE = FSrcYE[`XLEN-1:0]; + assign FWriteDataE = YE[`XLEN-1:0]; end else begin logic [`FLEN-1:0] FWriteDataE; if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT; else assign FStore2 = FmtM; - if (`FPSIZES==1) assign FWriteDataE = FSrcYE; - else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; - else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; + if (`FPSIZES==1) assign FWriteDataE = YE; + else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}}; + else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}}; flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM); end @@ -306,14 +312,14 @@ module fpu ( {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes endgenerate // select a result that may be written to the FP register - mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE); - assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE); + mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); + assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register - to IEU if (`FLEN>`XLEN) - assign IntSrcXE = FSrcXE[`XLEN-1:0]; + assign IntSrcXE = XE[`XLEN-1:0]; else - assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}; + assign IntSrcXE = {{`XLEN-`FLEN{XE[`FLEN-1:0]}}, XE}; mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); // *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok @@ -321,27 +327,24 @@ module fpu ( // E/M pipe registers - // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); - flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM}); - flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM}); - flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); + // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM); + flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM}); + flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM}); + flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM}); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); - flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, - {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, - {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); - flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); - flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); + flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM); flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE}, - {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM}); + {ZmStickyE, KillProdE, InvAE, SCntE, NegSumE, AsE, PsE, SsE, SeE}, + {ZmStickyM, KillProdM, InvAM, SCntM, NegSumM, AsM, PsM, SsM, SeM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, - {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, - {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); + {CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE}, + {CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM}); // BEGIN MEMORY STAGE @@ -357,11 +360,11 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; - postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), - .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM), - .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), - .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM), - .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), + postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .DivEarlyTermShift(EarlyTermShiftM), + .FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), + .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM), + .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), + .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged @@ -371,9 +374,6 @@ module fpu ( // M/W pipe registers flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); - flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResSelM, FmtM}, - {FRegWriteW, FResSelW, FmtW}); // BEGIN WRITEBACK STAGE diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv index 17d15669f..a5b7e7742 100755 --- a/pipelined/src/fpu/fsgninj.sv +++ b/pipelined/src/fpu/fsgninj.sv @@ -26,60 +26,59 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // OR OTHER DEALINGS IN THE SOFTWARE. //////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" module fsgninj ( - input logic XSgnE, YSgnE, // X and Y sign bits - input logic [`FLEN-1:0] FSrcXE, // X - input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single - input logic [1:0] SgnOpCodeE, // operation control - output logic [`FLEN-1:0] SgnResE // result + input logic Xs, Ys, // X and Y sign bits + input logic [`FLEN-1:0] X, // X + input logic [`FMTBITS-1:0] Fmt, // format + input logic [1:0] OpCtrl, // operation control + output logic [`FLEN-1:0] SgnRes // result ); logic ResSgn; - //op code designation: - // - //00 - fsgnj - directly copy over sign value of FSrcYE - //01 - fsgnjn - negate sign value of FSrcYE - //10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE - // + // OpCtrl: + // 00 - fsgnj - directly copy over sign value of Y + // 01 - fsgnjn - negate sign value of Y + // 10 - fsgnjx - XOR sign values of X and Y // calculate the result's sign - assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE; + assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys; // format final result based on precision // - uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s if (`FPSIZES == 1) - assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]}; + assign SgnRes = {ResSgn, X[`FLEN-2:0]}; else if (`FPSIZES == 2) - assign SgnResE = {~FmtE|ResSgn, FSrcXE[`FLEN-2:`LEN1], FmtE ? FSrcXE[`LEN1-1] : ResSgn, FSrcXE[`LEN1-2:0]}; + assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]}; else if (`FPSIZES == 3) begin logic [2:0] SgnBits; always_comb - case (FmtE) - `FMT: SgnBits = {ResSgn, FSrcXE[`LEN1-1], FSrcXE[`LEN2-1]}; - `FMT1: SgnBits = {1'b1, ResSgn, FSrcXE[`LEN2-1]}; + case (Fmt) + `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]}; + `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]}; `FMT2: SgnBits = {2'b11, ResSgn}; default: SgnBits = {3{1'bx}}; endcase - assign SgnResE = {SgnBits[2], FSrcXE[`FLEN-2:`LEN1], SgnBits[1], FSrcXE[`LEN1-2:`LEN2], SgnBits[0], FSrcXE[`LEN2-2:0]}; + assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]}; end else if (`FPSIZES == 4) begin logic [3:0] SgnBits; always_comb - case (FmtE) - `Q_FMT: SgnBits = {ResSgn, FSrcXE[`D_LEN-1], FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]}; - `D_FMT: SgnBits = {1'b1, ResSgn, FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]}; - `S_FMT: SgnBits = {2'b11, ResSgn, FSrcXE[`H_LEN-1]}; + case (Fmt) + `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]}; + `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]}; + `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]}; `H_FMT: SgnBits = {3'b111, ResSgn}; endcase - assign SgnResE = {SgnBits[3], FSrcXE[`Q_LEN-2:`D_LEN], SgnBits[2], FSrcXE[`D_LEN-2:`S_LEN], SgnBits[1], FSrcXE[`S_LEN-2:`H_LEN], SgnBits[0], FSrcXE[`H_LEN-2:0]}; + assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]}; end endmodule diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 8d11273a2..66af5b3c5 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -107,6 +107,6 @@ module otfc4 ( QMNext = {QMR, 2'b11}; end end - // Final Quoteint is in the range [.5, 2) + // Final Qmeint is in the range [.5, 2) endmodule diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index de3c4f30c..d3169d471 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -36,7 +36,7 @@ module postprocess ( input logic [`NF:0] Xm, Ym, Zm, // input mantissas input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single - input logic [2:0] FOpCtrl, // choose which opperation (look below for values) + input logic [2:0] OpCtrl, // choose which opperation (look below for values) input logic XZero, YZero, ZZero, // inputs are zero input logic XInf, YInf, ZInf, // inputs are infinity input logic XNaN, YNaN, ZNaN, // inputs are NaN @@ -54,7 +54,7 @@ module postprocess ( input logic FmaNegSum, // was the sum negitive input logic FmaInvA, // do you invert Z input logic FmaSs, - input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count + input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, input logic DivS, @@ -125,14 +125,14 @@ module postprocess ( logic Sqrt; // signals to help readability - assign Signed = FOpCtrl[0]; - assign Int64 = FOpCtrl[1]; - assign IntToFp = FOpCtrl[2]; - assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0]; + assign Signed = OpCtrl[0]; + assign Int64 = OpCtrl[1]; + assign IntToFp = OpCtrl[2]; + assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0]; assign CvtOp = (PostProcSel == 2'b00); assign FmaOp = (PostProcSel == 2'b10); assign DivOp = (PostProcSel == 2'b01)&DivDone; - assign Sqrt = FOpCtrl[0]; + assign Sqrt = OpCtrl[0]; // is there an input of infinity or NaN being used assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); @@ -142,9 +142,9 @@ module postprocess ( // - fp -> fp: OpCtrl contains the percision of the output // - otherwise: Fmt contains the percision of the output if (`FPSIZES == 2) - assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT); + assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0]; + assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; /////////////////////////////////////////////////////////////////////////////// // Normalization @@ -152,7 +152,7 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, + fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 396ca7761..202b3ee81 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -42,7 +42,7 @@ module qsel2 ( // *** eventually just change to 4 bits // for efficiency. You can probably optimize your logic to // select the proper divisor with less delay. - // Quotient equations from EE371 lecture notes 13-20 + // Qmient equations from EE371 lecture notes 13-20 assign p = ps ^ pc; assign g = ps & pc; diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index 514edbee1..6329ffe28 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -43,7 +43,7 @@ module shiftcorrection( output logic [`NE+1:0] FmaMe // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction - logic [`CORRSHIFTSZ-1:0] CorrQuotShifted; + logic [`CORRSHIFTSZ-1:0] CorrQmShifted; logic ResDenorm; // is the result denormalized logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction @@ -53,11 +53,11 @@ module shiftcorrection( // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) - assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; + assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits always_comb if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}}; - else if (DivOp&~DivResDenorm) Mf = CorrQuotShifted; + else if (DivOp&~DivResDenorm) Mf = CorrQmShifted; else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index ee5ae9a39..7e9f9922a 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -37,15 +37,15 @@ module srt( input logic [`FMTBITS-1:0] FmtE, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, - output logic [`QLEN-1-(`RADIX/4):0] Quot, + input logic [`DIVLEN-1:0] X, + input logic [`DIVLEN-1:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, + output logic [`QLEN-1-(`RADIX/4):0] Qm, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] StickyWSA, output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] DivCalcExpM, + output logic [`NE+1:0] QeM, output logic [`XLEN-1:0] Rem ); @@ -62,7 +62,7 @@ module srt( /* verilator lint_on UNOPTFLAT */ logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2; - logic [`NE+1:0] DivCalcExp; + logic [`NE+1:0] Qe; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; logic [`QLEN-1:0] QMMux; @@ -88,7 +88,7 @@ module srt( mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); - flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); + flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM); // Divisor Selections @@ -123,7 +123,7 @@ module srt( flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; + assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; assign FirstWS = WS[0]; assign FirstWC = WC[0]; if(`RADIX==2) @@ -132,7 +132,7 @@ module srt( else assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; - expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); + expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe); endmodule @@ -155,7 +155,7 @@ module divinteration ( logic [3:0] q; logic qp, qz;//, qn; - // Quotient Selection logic + // Qmient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) // q encoding: // 1000 = +2 @@ -226,7 +226,7 @@ module expcalc( input logic [`NE-1:0] Xe, Ye, input logic XZeroE, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`NE+1:0] DivCalcExp + output logic [`NE+1:0] Qe ); logic [`NE-2:0] Bias; @@ -255,5 +255,5 @@ module expcalc( endcase end // correct exponent for denormalized input's normalization shifts - assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; + assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 634ecc1d3..597f96cd7 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -43,7 +43,7 @@ module srtfsm( input logic [`DIVLEN+3:0] StickyWSA, input logic [`DURLEN-1:0] Dur, output logic [`DURLEN-1:0] EarlyTermShiftE, - output logic DivStickyE, + output logic DivSE, output logic DivDone, output logic NegSticky, output logic DivBusy @@ -65,9 +65,9 @@ module srtfsm( // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant // radix-4 division can't create a QM that continually adds 0's if (`RADIX == 2) - assign DivStickyE = |W&~(StickyWSA == WS); + assign DivSE = |W&~(StickyWSA == WS); else - assign DivStickyE = |W; + assign DivSE = |W; assign DivDone = (state == DONE); assign W = WC+WS; assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 71cad1872..050839c2f 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -30,35 +30,34 @@ module unpack ( input logic [`FLEN-1:0] X, Y, Z, // inputs from register file - input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half - output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ - output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) - output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) - output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN - output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN - output logic XDenormE, ZDenormE, // is XYZ denormalized - output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero - output logic XInfE, YInfE, ZInfE, // is XYZ infinity - output logic XExpMaxE // does X have the maximum exponent (NaN or Inf) + input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half + output logic Xs, Ys, Zs, // sign bits of XYZ + output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) + output logic XNaN, YNaN, ZNaN, // is XYZ a NaN + output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN + output logic XDenorm, ZDenorm, // is XYZ denormalized + output logic XZero, YZero, ZZero, // is XYZ zero + output logic XInf, YInf, ZInf, // is XYZ infinity + output logic XExpMax // does X have the maximum exponent (NaN or Inf) ); - logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero logic XFracZero, YFracZero, ZFracZero; // is the fraction zero - logic YExpMaxE, ZExpMaxE; // is the exponent all 1s + logic YExpMax, ZExpMax; // is the exponent all 1s - unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), - .NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero), - .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero)); + unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), + .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), + .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero)); - unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), - .NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero), - .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero)); + unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), + .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero), + .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero)); - unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), - .NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero), - .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero)); + unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), + .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), + .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero)); // is the input denormalized - assign XDenormE = ~XExpNonZero & ~XFracZero; - assign ZDenormE = ~ZExpNonZero & ~ZFracZero; + assign XDenorm = ~XExpNonZero & ~XFracZero; + assign ZDenorm = ~ZExpNonZero & ~ZFracZero; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv index 2b078cc6c..7be922508 100644 --- a/pipelined/src/fpu/unpackinput.sv +++ b/pipelined/src/fpu/unpackinput.sv @@ -30,7 +30,7 @@ module unpackinput ( input logic [`FLEN-1:0] In, // inputs from register file - input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half + input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half output logic Sgn, // sign bits of XYZ output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision) @@ -74,16 +74,16 @@ module unpackinput ( // quad and half // double and half - assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing + assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing // choose sign bit depending on format - 1=larger precsion 0=smaller precision - assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1]; + assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1]; // extract the fraction, add trailing zeroes to the mantissa if nessisary - assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; + assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; // is the exponent non-zero - assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; + assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; // example double to single conversion: // 1023 = 0011 1111 1111 @@ -95,10 +95,10 @@ module unpackinput ( // extract the exponent, converting the smaller exponent into the larger precision if nessisary // - if the original precision had a denormal number convert the exponent value 1 - assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; + assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; // is the exponent all 1's - assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; + assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; end else if (`FPSIZES == 3) begin // three floating point precsions supported @@ -122,7 +122,7 @@ module unpackinput ( // Check NaN boxing always_comb - case (FmtE) + case (Fmt) `FMT: BadNaNBox = 0; `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1]; `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2]; @@ -131,7 +131,7 @@ module unpackinput ( // extract the sign bit always_comb - case (FmtE) + case (Fmt) `FMT: Sgn = In[`FLEN-1]; `FMT1: Sgn = In[`LEN1-1]; `FMT2: Sgn = In[`LEN2-1]; @@ -140,7 +140,7 @@ module unpackinput ( // extract the fraction always_comb - case (FmtE) + case (Fmt) `FMT: Frac = In[`NF-1:0]; `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)}; `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)}; @@ -149,7 +149,7 @@ module unpackinput ( // is the exponent non-zero always_comb - case (FmtE) + case (Fmt) `FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double) `FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) @@ -166,7 +166,7 @@ module unpackinput ( // convert the larger precision's exponent to use the largest precision's bias always_comb - case (FmtE) + case (Fmt) `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; @@ -175,7 +175,7 @@ module unpackinput ( // is the exponent all 1's always_comb - case (FmtE) + case (Fmt) `FMT: ExpMax = &In[`FLEN-2:`NF]; `FMT1: ExpMax = &In[`LEN1-2:`NF1]; `FMT2: ExpMax = &In[`LEN2-2:`NF2]; @@ -194,7 +194,7 @@ module unpackinput ( // Check NaN boxing always_comb - case (FmtE) + case (Fmt) 2'b11: BadNaNBox = 0; 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN]; 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN]; @@ -203,7 +203,7 @@ module unpackinput ( // extract sign bit always_comb - case (FmtE) + case (Fmt) 2'b11: Sgn = In[`Q_LEN-1]; 2'b01: Sgn = In[`D_LEN-1]; 2'b00: Sgn = In[`S_LEN-1]; @@ -213,7 +213,7 @@ module unpackinput ( // extract the fraction always_comb - case (FmtE) + case (Fmt) 2'b11: Frac = In[`Q_NF-1:0]; 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; @@ -222,7 +222,7 @@ module unpackinput ( // is the exponent non-zero always_comb - case (FmtE) + case (Fmt) 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF]; 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF]; 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; @@ -240,7 +240,7 @@ module unpackinput ( // convert the double precsion exponent into quad precsion always_comb - case (FmtE) + case (Fmt) 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero}; 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero}; 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero}; @@ -250,7 +250,7 @@ module unpackinput ( // is the exponent all 1's always_comb - case (FmtE) + case (Fmt) 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF]; 2'b01: ExpMax = &In[`D_LEN-2:`D_NF]; 2'b00: ExpMax = &In[`S_LEN-2:`S_NF]; From 869879907779a3332ab504dfd5bff76df132a754 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 19 Jul 2022 22:42:25 -0500 Subject: [PATCH 063/138] Reverted to fetched the demand cache line first then doing the eviction. This is important because of an optimization in the replacement policy. The replacement policy updates the LRU 1 cycle late and reads the LRU 1 cycle late for critical path timing. This means doing the eviction first requires an initial 1 cycle delay but this delay has to be applied to all misses because we don't know if an eviction is required. Since reading the demand line first is logically ok so long as it is not written to the sram until after the eviction. --- pipelined/regression/wave.do | 6 ++++-- pipelined/src/cache/cachefsm.sv | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pipelined/regression/wave.do b/pipelined/regression/wave.do index ed3ba8ade..d11c248fd 100644 --- a/pipelined/regression/wave.do +++ b/pipelined/regression/wave.do @@ -316,8 +316,10 @@ add wave -noupdate -expand -group lsu -expand -group dcache -group status /testb add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode @@ -515,7 +517,7 @@ add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/Upd add wave -noupdate {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/FinalByteMask} add wave -noupdate {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/SelectedWriteWordEn} TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 5} {307575 ns} 1} {{Cursor 2} {307965 ns} 1} {{Cursor 3} {307661 ns} 0} +WaveRestoreCursors {{Cursor 5} {307575 ns} 1} {{Cursor 2} {307965 ns} 1} {{Cursor 3} {112734 ns} 0} quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 314 @@ -531,4 +533,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {0 ns} {3158332 ns} +WaveRestoreZoom {112550 ns} {112886 ns} diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 3aa736121..890854014 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -141,10 +141,12 @@ module cachefsm STATE_READY: if(IgnoreRequest) NextState = STATE_READY; else if(DoFlush) NextState = STATE_FLUSH; else if(DoAnyHit & CPUBusy) NextState = STATE_CPU_BUSY; - else if(DoAnyMiss & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY_START; // change - else if(DoAnyMiss & ~VictimDirty) NextState = STATE_MISS_FETCH_WDV; // change + else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; + //else if(DoAnyMiss & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY_START; // change + //else if(DoAnyMiss & ~VictimDirty) NextState = STATE_MISS_FETCH_WDV; // change else NextState = STATE_READY; - STATE_MISS_FETCH_WDV: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; + STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE; + else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY_START; else NextState = STATE_MISS_FETCH_WDV; STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; STATE_MISS_READ_WORD: if(CacheRW[0] & ~AMO) NextState = STATE_MISS_WRITE_WORD; @@ -153,9 +155,11 @@ module cachefsm else NextState = STATE_READY; STATE_MISS_WRITE_WORD: if(CPUBusy) NextState = STATE_CPU_BUSY; else NextState = STATE_READY; - STATE_MISS_EVICT_DIRTY_START: NextState = STATE_MISS_EVICT_DIRTY; // start needed for the delayed lru update. - STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_EVICT_DIRTY_DONE; + STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; else NextState = STATE_MISS_EVICT_DIRTY; + STATE_MISS_EVICT_DIRTY_START: NextState = STATE_MISS_EVICT_DIRTY; // start needed for the delayed lru update. +// STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_EVICT_DIRTY_DONE; +// else NextState = STATE_MISS_EVICT_DIRTY; STATE_MISS_EVICT_DIRTY_DONE: NextState = STATE_MISS_FETCH_WDV; STATE_CPU_BUSY: if(CPUBusy) NextState = STATE_CPU_BUSY; else NextState = STATE_READY; @@ -199,7 +203,7 @@ module cachefsm assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE); // Flush and eviction controls - assign SelEvict = (CurrState == STATE_READY & DoAnyMiss & VictimDirty) | + assign SelEvict = //(CurrState == STATE_MISS_FETCH_WDV & CacheBusAck & VictimDirty) | (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_MISS_EVICT_DIRTY); assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | @@ -213,9 +217,9 @@ module cachefsm assign FlushAdrCntRst = (CurrState == STATE_READY); assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR); // Bus interface controls - assign CacheFetchLine = (CurrState == STATE_READY & DoAnyMiss & ~VictimDirty) | - (CurrState == STATE_MISS_EVICT_DIRTY_DONE); - assign CacheWriteLine = (CurrState == STATE_READY & DoAnyMiss & VictimDirty) | + assign CacheFetchLine = (CurrState == STATE_READY & DoAnyMiss); +// assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_WDV & VictimDirty & CacheBusAck) | + assign CacheWriteLine = (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); // handle cpu stall. assign restore = ((CurrState == STATE_CPU_BUSY)) & ~`REPLAY; From 1cad05fef9a9fcbd0c3f683fa2123fd3de218e48 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 19 Jul 2022 22:56:01 -0500 Subject: [PATCH 064/138] Minor cleanup of cache. --- pipelined/src/cache/cachefsm.sv | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 890854014..b877bd184 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -141,9 +141,7 @@ module cachefsm STATE_READY: if(IgnoreRequest) NextState = STATE_READY; else if(DoFlush) NextState = STATE_FLUSH; else if(DoAnyHit & CPUBusy) NextState = STATE_CPU_BUSY; - else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; - //else if(DoAnyMiss & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY_START; // change - //else if(DoAnyMiss & ~VictimDirty) NextState = STATE_MISS_FETCH_WDV; // change + else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; // fetch first, then eviction if necessary. see delay in lru read/write path. else NextState = STATE_READY; STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE; else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY_START; @@ -157,10 +155,7 @@ module cachefsm else NextState = STATE_READY; STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; else NextState = STATE_MISS_EVICT_DIRTY; - STATE_MISS_EVICT_DIRTY_START: NextState = STATE_MISS_EVICT_DIRTY; // start needed for the delayed lru update. -// STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_EVICT_DIRTY_DONE; -// else NextState = STATE_MISS_EVICT_DIRTY; - STATE_MISS_EVICT_DIRTY_DONE: NextState = STATE_MISS_FETCH_WDV; + STATE_MISS_EVICT_DIRTY_START: NextState = STATE_MISS_EVICT_DIRTY; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_CPU_BUSY: if(CPUBusy) NextState = STATE_CPU_BUSY; else NextState = STATE_READY; STATE_FLUSH: NextState = STATE_FLUSH_CHECK; @@ -184,7 +179,6 @@ module cachefsm (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_MISS_EVICT_DIRTY) | - (CurrState == STATE_MISS_EVICT_DIRTY_DONE) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. (CurrState == STATE_MISS_READ_WORD) | (CurrState == STATE_FLUSH) | @@ -203,8 +197,7 @@ module cachefsm assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE); // Flush and eviction controls - assign SelEvict = //(CurrState == STATE_MISS_FETCH_WDV & CacheBusAck & VictimDirty) | - (CurrState == STATE_MISS_EVICT_DIRTY_START) | + assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_MISS_EVICT_DIRTY); assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | @@ -218,7 +211,6 @@ module cachefsm assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR); // Bus interface controls assign CacheFetchLine = (CurrState == STATE_READY & DoAnyMiss); -// assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_WDV & VictimDirty & CacheBusAck) | assign CacheWriteLine = (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); // handle cpu stall. From a30d9c6bd8783761b9d9c57f3efd51c82eda4ab6 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 20 Jul 2022 21:57:23 +0000 Subject: [PATCH 065/138] turn off 2 word store durring non-fp instructions --- pipelined/src/fpu/fctrl.sv | 26 +++++++++++++++++++++++++- pipelined/src/fpu/fpu.sv | 7 ++++--- pipelined/src/fpu/unpackinput.sv | 1 + 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 85047248d..ea98651b7 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -41,7 +41,7 @@ module fctrl ( input logic [2:0] FRM_REGW, // rounding mode from CSR input logic [1:0] STATUS_FS, // is FPU enabled? input logic FDivBusyE, // is the divider busy - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + output logic IllegalFPUInstrD, IllegalFPUInstrM, // Is the instruction an illegal fpu instruction output logic FRegWriteM, FRegWriteW, // FP register write enable output logic [2:0] FrmM, // FP rounding mode output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format @@ -55,6 +55,7 @@ module fctrl ( `define FCTRLW 11 logic [`FCTRLW-1:0] ControlsD; + logic IllegalFPUInstrE; logic FRegWriteD; // FP register write enable logic DivStartD; // integer register write enable logic FWriteIntD; // integer register write enable @@ -171,6 +172,25 @@ module fctrl ( else if (`FPSIZES == 3|`FPSIZES == 4) assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; + +// // signals to help readability +// assign IntToFp = OpCtrl[2]; +// assign CvtOp = (PostProcSelE == 2'b00)&(FResSelE == 2'b01); +// assign FmaOp = (PostProcSelE == 2'b10)&(FResSelE == 2'b01); +// assign Sqrt = OpCtrl[0]; + +// // is there an input of infinity or NaN being used +// assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); +// assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp); + +// // enables: +// // X - all except int->fp, store, load, mv int->fp +// // Y - all except cvt, mv, load, class +// // Z - fma ops only +// assign XEnE = ; +// assign YEnE = ~((FResSel==2'b10)); +// assign ZEnE = FmaOp&~OpCtrlE[2]; + // Final Res Sel: // fp int // 00 other cmp @@ -228,10 +248,14 @@ module fctrl ( flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE); + if(`FLEN>`XLEN) + flopenrc #(1) DEIllegalReg(clk, reset, FlushE, ~StallE, IllegalFPUInstrD, IllegalFPUInstrE); // E/M pipleine register flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE}, {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM}); + if(`FLEN>`XLEN) + flopenrc #(1) EMIllegalReg(clk, reset, FlushM, ~StallM, IllegalFPUInstrE, IllegalFPUInstrM); // M/W pipleine register flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW, {FRegWriteM, FResSelM}, diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 6d9b9cf47..84f8e04df 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -72,6 +72,7 @@ module fpu ( logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + logic IllegalFPUInstrM; // regfile signals logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -163,7 +164,7 @@ module fpu ( fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, - .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, + .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E); // FP register file @@ -290,8 +291,8 @@ module fpu ( assign FWriteDataE = YE[`XLEN-1:0]; end else begin logic [`FLEN-1:0] FWriteDataE; - if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT; - else assign FStore2 = FmtM; + if(`FMTBITS == 2) assign FStore2 = (FmtM == `FMT)&~IllegalFPUInstrM; + else assign FStore2 = FmtM&~IllegalFPUInstrM; if (`FPSIZES==1) assign FWriteDataE = YE; else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}}; diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv index 7be922508..699d88958 100644 --- a/pipelined/src/fpu/unpackinput.sv +++ b/pipelined/src/fpu/unpackinput.sv @@ -30,6 +30,7 @@ module unpackinput ( input logic [`FLEN-1:0] In, // inputs from register file + // input logic En, // enable the input input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half output logic Sgn, // sign bits of XYZ output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) From 7950a675ea77fe516515a15d2639df330648d2c3 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 21 Jul 2022 01:20:06 +0000 Subject: [PATCH 066/138] added input enables and improved forwarding --- pipelined/src/fpu/fctrl.sv | 29 ++++----- pipelined/src/fpu/fhazard.sv | 42 +++++++------ pipelined/src/fpu/flags.sv | 5 +- pipelined/src/fpu/fpu.sv | 14 +++-- pipelined/src/fpu/postprocess.sv | 6 +- pipelined/src/fpu/specialcase.sv | 4 +- pipelined/src/fpu/unpack.sv | 7 ++- pipelined/src/fpu/unpackinput.sv | 6 +- pipelined/testbench/testbench-fp.sv | 95 +++++++++++++++-------------- 9 files changed, 107 insertions(+), 101 deletions(-) diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index ea98651b7..5b6b22ef0 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -46,6 +46,8 @@ module fctrl ( output logic [2:0] FrmM, // FP rounding mode output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format output logic DivStartE, // Start division or squareroot + output logic XEnE, YEnE, ZEnE, + output logic YEnForwardE, ZEnForwardE, output logic FWriteIntE, FWriteIntM, // Write to integer register output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage @@ -173,23 +175,18 @@ module fctrl ( assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; -// // signals to help readability -// assign IntToFp = OpCtrl[2]; -// assign CvtOp = (PostProcSelE == 2'b00)&(FResSelE == 2'b01); -// assign FmaOp = (PostProcSelE == 2'b10)&(FResSelE == 2'b01); -// assign Sqrt = OpCtrl[0]; -// // is there an input of infinity or NaN being used -// assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); -// assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp); - -// // enables: -// // X - all except int->fp, store, load, mv int->fp -// // Y - all except cvt, mv, load, class -// // Z - fma ops only -// assign XEnE = ; -// assign YEnE = ~((FResSel==2'b10)); -// assign ZEnE = FmaOp&~OpCtrlE[2]; +// enables: +// X - all except int->fp, store, load, mv int->fp +// Y - all except cvt, mv, load, class +// Z - fma ops only +// load/store mv int->fp cvt int->fp + assign XEnE = ~(((FResSelE==2'b10)&~FWriteIntE)|((FResSelE==2'b11)&FRegWriteE)|((FResSelE==2'b01)&(PostProcSelE==2'b00)&OpCtrlE[2])); +// load/class mv cvt + assign YEnE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&(PostProcSelE==2'b00))); + assign ZEnE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&(~OpCtrlE[2]|OpCtrlE[1]); + assign YEnForwardE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&(PostProcSelE==2'b00))); + assign ZEnForwardE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&~OpCtrlE[2]; // Final Res Sel: // fp int diff --git a/pipelined/src/fpu/fhazard.sv b/pipelined/src/fpu/fhazard.sv index 36a0ff82f..690e04ebb 100644 --- a/pipelined/src/fpu/fhazard.sv +++ b/pipelined/src/fpu/fhazard.sv @@ -35,6 +35,7 @@ module fhazard( input logic FRegWriteM, FRegWriteW, // is the fp register being written to input logic [4:0] RdM, RdW, // the adress being written to input logic [1:0] FResSelM, // the result being selected + input logic XEnE, YEnE, ZEnE, output logic FStallD, // stall the decode stage output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value ); @@ -47,33 +48,34 @@ module fhazard( ForwardZE = 2'b00; // choose FRD3E FStallD = 0; - //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait - // if the needed value is in the memory stage - input 1 - if ((Adr1E == RdM) & FRegWriteM) - // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM - else FStallD = 1; // otherwise stall - // if the needed value is in the writeback stage - else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W + if(XEnE) + if ((Adr1E == RdM) & FRegWriteM) + // if the result will be FResM (can be taken from the memory stage) + if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM + else FStallD = 1; // otherwise stall + // if the needed value is in the writeback stage + else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W // if the needed value is in the memory stage - input 2 - if ((Adr2E == RdM) & FRegWriteM) - // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM - else FStallD = 1; // otherwise stall - // if the needed value is in the writeback stage - else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W + if(YEnE) + if ((Adr2E == RdM) & FRegWriteM) + // if the result will be FResM (can be taken from the memory stage) + if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM + else FStallD = 1; // otherwise stall + // if the needed value is in the writeback stage + else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W // if the needed value is in the memory stage - input 3 - if ((Adr3E == RdM) & FRegWriteM) - // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM - else FStallD = 1; // otherwise stall - // if the needed value is in the writeback stage - else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W + if(ZEnE) + if ((Adr3E == RdM) & FRegWriteM) + // if the result will be FResM (can be taken from the memory stage) + if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM + else FStallD = 1; // otherwise stall + // if the needed value is in the writeback stage + else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W end diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 6b1bc6381..c169ab2fa 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -37,7 +37,6 @@ module flags( input logic NaNIn, // is a NaN input being used input logic [`FMTBITS-1:0] OutFmt, // output format input logic XZero, YZero, // inputs are zero - input logic XNaN, YNaN, // inputs are NaN input logic Sqrt, // Sqrt? input logic ToInt, // convert to integer input logic IntToFp, // convert integer to floating point @@ -153,11 +152,11 @@ module flags( // | | | | or the res rounds up out of bounds // | | | | and the res didn't underflow // | | | | | - assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); + assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); // | // or when the positive res rounds up out of range assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp); - assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~XNaN & ~YNaN) | (XZero & YInf) | (YZero & XInf); + assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf); assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt); assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp); diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 84f8e04df..cfa46b657 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -73,6 +73,8 @@ module fpu ( logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input logic IllegalFPUInstrM; + logic XEnE, YEnE, ZEnE; + logic YEnForwardE, ZEnForwardE; // regfile signals logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -163,8 +165,8 @@ module fpu ( // calculate FP control signals fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, - .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, - .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, + .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE, + .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE, .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E); // FP register file @@ -193,7 +195,7 @@ module fpu ( // Hazard unit for FPU // - determines if any forwarding or stalls are needed fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, - .FStallD, .ForwardXE, .ForwardYE, .ForwardZE); + .XEnE, .YEnE(YEnForwardE), .ZEnE(ZEnForwardE), .FStallD, .ForwardXE, .ForwardYE, .ForwardZE); // forwarding muxs mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE); @@ -233,11 +235,11 @@ module fpu ( // - splits FP inputs into their various parts // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), - .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), - .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), + .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE), + .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), - .ZInf(ZInfE), .XExpMax(XExpMaxE)); + .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE)); // fused multiply add // - fadd/fsub diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index d3169d471..4d9dc310f 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -135,8 +135,8 @@ module postprocess ( assign Sqrt = OpCtrl[0]; // is there an input of infinity or NaN being used - assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); - assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp); + assign InfIn = XInf|YInf|ZInf; + assign NaNIn = XNaN|YNaN|ZNaN; // choose the ouptut format depending on the opperation // - fp -> fp: OpCtrl contains the percision of the output @@ -219,7 +219,7 @@ module postprocess ( flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, - .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, + .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv index 6014962a1..41e75110f 100644 --- a/pipelined/src/fpu/specialcase.sv +++ b/pipelined/src/fpu/specialcase.sv @@ -280,14 +280,14 @@ module specialcase( // other: 32 bit unsinged res should be sign extended as if it were a signed number always_comb if(Signed) - if(Xs&~XNaN) // signed negitive + if(Xs&~NaNIn) // signed negitive if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}}; else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}}; else // signed positive if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}}; else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}}; else - if(Xs&~XNaN) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive + if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive else OfIntRes = {`XLEN{1'b1}}; // unsigned positive diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 050839c2f..4053cba13 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -31,6 +31,7 @@ module unpack ( input logic [`FLEN-1:0] X, Y, Z, // inputs from register file input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half + input logic XEn, YEn, ZEn, output logic Xs, Ys, Zs, // sign bits of XYZ output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) @@ -46,15 +47,15 @@ module unpack ( logic XFracZero, YFracZero, ZFracZero; // is the fraction zero logic YExpMax, ZExpMax; // is the exponent all 1s - unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), + unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero)); - unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), + unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn), .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero), .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero)); - unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), + unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero)); // is the input denormalized diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv index 699d88958..4e43768c4 100644 --- a/pipelined/src/fpu/unpackinput.sv +++ b/pipelined/src/fpu/unpackinput.sv @@ -30,7 +30,7 @@ module unpackinput ( input logic [`FLEN-1:0] In, // inputs from register file - // input logic En, // enable the input + input logic En, // enable the input input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half output logic Sgn, // sign bits of XYZ output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) @@ -263,8 +263,8 @@ module unpackinput ( // Output logic assign FracZero = ~|Frac; // is the fraction zero? assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand - assign NaN = (ExpMax & ~FracZero)|BadNaNBox; // is the input a NaN? + assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN? assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN? - assign Inf = ExpMax & FracZero; // is the input infinity? + assign Inf = ExpMax & FracZero &En; // is the input infinity? assign Zero = ~ExpNonZero & FracZero; // is the input zero? endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 91ce82616..19b637478 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -66,9 +66,9 @@ module testbenchfp; logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags logic AnsNaN, ResNaN, NaNGood; - logic XSgn, YSgn, ZSgn; // sign of the inputs - logic [`NE-1:0] XExp, YExp, ZExp; // exponent of the inputs - logic [`NF:0] XMan, YMan, ZMan; // mantissas of the inputs + logic Xs, Ys, Zs; // sign of the inputs + logic [`NE-1:0] Xe, Ye, Ze; // exponent of the inputs + logic [`NF:0] Xm, Ym, Zm; // mantissas of the inputs logic XNaN, YNaN, ZNaN; // is the input NaN logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN logic XDenorm, ZDenorm; // is the input denormalized @@ -99,7 +99,7 @@ module testbenchfp; logic [`NE+1:0] Se; logic ZmSticky; logic KillProd; - logic [$clog2(3*`NF+7)-1:0] NCnt; + logic [$clog2(3*`NF+7)-1:0] SCnt; logic [3*`NF+5:0] Sm; logic InvA; logic NegSum; @@ -650,14 +650,14 @@ module testbenchfp; // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, - .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal), - .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal), - .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart, - .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN), - .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), - .XDenormE(XDenorm), .ZDenormE(ZDenorm), - .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero), - .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf), .XExpMaxE(XExpMax), + .Xs, .Ys, .Zs, .Unit(UnitVal), + .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), + .Xm, .Ym, .Zm, .DivStart, + .XNaN, .YNaN, .ZNaN, + .XSNaN, .YSNaN, .ZSNaN, + .XDenorm, .ZDenorm, + .XZero, .YZero, .ZZero, + .XInf, .YInf, .ZInf, .XExpMax, .X, .Y, .Z); @@ -673,34 +673,34 @@ module testbenchfp; /////////////////////////////////////////////////////////////////////////////////////////////// // instantiate devices under test - fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), - .Xe(XExp), .Ye(YExp), .Ze(ZExp), - .Xm(XMan), .Ym(YMan), .Zm(ZMan), + fma fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), + .Xe(Xe), .Ye(Ye), .Ze(Ze), + .Xm(Xm), .Ym(Ym), .Zm(Zm), .XZero, .YZero, .ZZero, .Ss, .Se, - .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps, + .OpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .SCnt, .As, .Ps, .Pe, .ZmSticky, .KillProd); - postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), - .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), - .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss), + postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]), + .Ze(Ze), .ZDenorm(ZDenorm), .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), + .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss), .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSe(Se), - .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), + .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaSCnt(SCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); - fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), - .XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero, + fcvt fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), + .XZero(XZero), .XDenorm(XDenorm), .OpCtrl(OpCtrlVal), .IntZero, .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE)); - fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), - .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), - .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XManE(XMan), .YManE(YMan), .XExpE(XExp), .YExpE(YExp), + fcmp fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal), .Xs, .Ys, .Xe, .Ye, + .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes), + .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes)); + divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), - .StallE(1'b0), .StallM(1'b0), .DivStickyM(DivSticky), .DivBusy, .DivCalcExpM(DivCalcExp), - .EarlyTermShiftM(EarlyTermShift), .QuotM(Quot), .DivDone); + .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp), + .EarlyTermShiftM(EarlyTermShift), .QmM(Quot), .DivDone); assign CmpFlg[3:0] = 0; @@ -868,10 +868,10 @@ end // Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but // the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff... - else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&(Res[`XLEN-1:0] === (`XLEN)'(0))) | - (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) | - (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | - (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin + else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&(Res[`XLEN-1:0] === (`XLEN)'(0))) | + (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) | + (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | + (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); @@ -924,18 +924,19 @@ module readvectors ( output logic [`FLEN-1:0] Ans, output logic [`XLEN-1:0] SrcA, output logic [4:0] AnsFlg, - output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ - output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) - output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) - output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN - output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN - output logic XDenormE, ZDenormE, // is XYZ denormalized - output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero - output logic XInfE, YInfE, ZInfE, // is XYZ infinity - output logic XExpMaxE, + output logic Xs, Ys, Zs, // sign bits of XYZ + output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) + output logic XNaN, YNaN, ZNaN, // is XYZ a NaN + output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN + output logic XDenorm, ZDenorm, // is XYZ denormalized + output logic XZero, YZero, ZZero, // is XYZ zero + output logic XInf, YInf, ZInf, // is XYZ infinity + output logic XExpMax, output logic DivStart, output logic [`FLEN-1:0] X, Y, Z ); + logic XEn, YEn, ZEn; // apply test vectors on rising edge of clk // Format of vectors Inputs(1/2/3)_AnsFlg @@ -1257,8 +1258,12 @@ module readvectors ( endcase end - unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, - .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, - .XDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, - .XExpMaxE); + assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]); + assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)); + assign ZEn = (Unit == `FMAUNIT); + + unpack unpack(.X, .Y, .Z, .Fmt(ModFmt), .Xs, .Ys, .Zs, .Xe, .Ye, .Ze, + .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN, + .XDenorm, .ZDenorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf, + .XEn, .YEn, .ZEn, .XExpMax); endmodule \ No newline at end of file From e46e96e0805ba4a5306517720cd0ab202cec7949 Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Thu, 21 Jul 2022 10:14:05 -0700 Subject: [PATCH 067/138] changed the default branch of embench --- .gitmodules | 5 +++++ addins/embench-iot | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 81ed2d5f4..bf7fdaefd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -17,6 +17,11 @@ [submodule "addins/embench-iot"] path = addins/embench-iot url = https://github.com/embench/embench-iot + branch = embench-1.0-branch [submodule "addins/coremark"] path = addins/coremark url = https://github.com/eembc/coremark +[submodule "embench"] + branch = embench-1.0-branch +[submodule "embench-iot"] + branch = embench-1.0-branch diff --git a/addins/embench-iot b/addins/embench-iot index 261a65e0a..58ffa0c68 160000 --- a/addins/embench-iot +++ b/addins/embench-iot @@ -1 +1 @@ -Subproject commit 261a65e0a2d3e8d62d81b1d8fe7e309a096bc6a9 +Subproject commit 58ffa0c68c52f291d12c5902fc787d2bca94ddf9 From 16e4260ddab1e7b038bb11cbf0531c1792767eca Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Thu, 21 Jul 2022 10:15:13 -0700 Subject: [PATCH 068/138] fixed gitmodules --- .gitmodules | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitmodules b/.gitmodules index bf7fdaefd..ab45d3f96 100644 --- a/.gitmodules +++ b/.gitmodules @@ -21,7 +21,3 @@ [submodule "addins/coremark"] path = addins/coremark url = https://github.com/eembc/coremark -[submodule "embench"] - branch = embench-1.0-branch -[submodule "embench-iot"] - branch = embench-1.0-branch From 4793267bd7619c5d9433406882859cbdd2597ec4 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 21 Jul 2022 17:36:21 +0000 Subject: [PATCH 069/138] Updated Radix2 Sqrt to follow new algorithm --- pipelined/srt/srt.sv | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 27fac324e..157be2e7f 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -213,8 +213,8 @@ module fsel2 ( logic [`DIVLEN+3:0] FP, FN, FZ; // Generate for both positive and negative bits - assign FP = ~S & C; - assign FN = SM | (C & (~C << 2)); + assign FP = ~(S << 1) & C; + assign FN = (SM << 1) | (C & (~C << 2)); assign FZ = '0; // Choose which adder input will be used @@ -283,22 +283,22 @@ module sotfc2( logic [`DIVLEN+3:0] SNext, SMNext, SMux; flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); - mux2 #(`DIVLEN+4) Smux(SNext, {2'b00, Sqrt, {(`DIVLEN+1){1'b0}}}, Start, SMux); + mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); flop #(`DIVLEN+4) Sreg(clk, SMux, S); always_comb begin if (sp) begin - SNext = S | ((C << 1) & ~(C << 2)); + SNext = S | (C & ~(C << 1)); SMNext = S; end else if (sn) begin - SNext = SM | ((C << 1) & ~(C << 2)); + SNext = SM | (C & ~(C << 1)); SMNext = SM; end else begin // If sp and sn are not true, then sz is SNext = S; - SMNext = SM | ((C << 1) & ~(C << 2)); + SMNext = SM | (C & ~(C << 1)); end end - assign Sq = S[`DIVLEN+1] ? S[`DIVLEN:2] : S[`DIVLEN-1:1]; + assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; endmodule ////////////////////////// @@ -311,7 +311,7 @@ module creg(input logic clk, ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b11111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule From 86ebdd05f0753693a1d86e27ea48f7e897e4fe90 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 21 Jul 2022 17:59:10 +0000 Subject: [PATCH 070/138] Division working too --- pipelined/srt/srt.sv | 2 +- pipelined/srt/testbench.sv | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 157be2e7f..a7216b9ff 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -2,7 +2,7 @@ // srt.sv // // Written: David_Harris@hmc.edu 13 January 2022 -// Modified: cturek@hmc.edu June 2022 +// Modified: cturek@hmc.edu July 2022 // // Purpose: Combined Divide and Square Root Floating Point and Integer Unit // diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 39696af44..7a4e1897b 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -72,7 +72,7 @@ module testbench; // Equip Int test or Sqrt test assign Int = 1'b0; - assign Sqrt = 1'b1; + assign Sqrt = 1'b0; // Divider srt srt(.clk, .Start(req), @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("sqrttestvectors", Tests); + $readmemh ("testvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; From fbe8bb2298413d731f70306e100e8cc3881222ec Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 21 Jul 2022 19:38:06 +0000 Subject: [PATCH 071/138] radix-4 division integrated into srt - not tested --- addins/embench-iot | 2 +- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/regression/wave-fpu.do | 2 +- pipelined/src/fpu/divsqrt.sv | 13 +-- pipelined/src/fpu/fctrl.sv | 4 +- pipelined/src/fpu/fpu.sv | 2 +- pipelined/src/fpu/otfc.sv | 70 ++++++++++++++++ pipelined/src/fpu/qsel.sv | 89 +++++++++++++++++---- pipelined/src/fpu/srt.sv | 102 +++++++++++------------- pipelined/src/fpu/srtpreproc.sv | 78 +++++++++++++++--- pipelined/testbench/testbench-fp.sv | 2 +- 11 files changed, 271 insertions(+), 95 deletions(-) diff --git a/addins/embench-iot b/addins/embench-iot index 58ffa0c68..261a65e0a 160000 --- a/addins/embench-iot +++ b/addins/embench-iot @@ -1 +1 @@ -Subproject commit 58ffa0c68c52f291d12c5902fc787d2bca94ddf9 +Subproject commit 261a65e0a2d3e8d62d81b1d8fe7e309a096bc6a9 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 015ef2611..b2abdff7b 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -101,7 +101,7 @@ `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h2 +`define RADIX 32'h4 `define DIVCOPIES 32'h1 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 98c72f170..b71207e09 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -33,7 +33,7 @@ add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/intera # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/* # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* -add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/* +# add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index a2f0ba8e3..7ba44a953 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -41,7 +41,8 @@ module divsqrt( input logic XNaNE, YNaNE, input logic DivStartE, input logic StallM, - input logic StallE, + input logic StallE, + input logic SqrtE, SqrtM, output logic DivSM, output logic DivBusy, output logic DivDone, @@ -55,15 +56,15 @@ module divsqrt( logic [`DIVLEN+3:0] WS, WC; logic [`DIVLEN+3:0] StickyWSA; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic [`DIVLEN-1:0] X; - logic [`DIVLEN-1:0] Dpreproc; + logic [`DIVLEN+3:0] X; + logic [`DIVLEN+3:0] Dpreproc; logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, - .StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM); + srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Qm(QmM), .Rem()); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 5b6b22ef0..20e4a0099 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -219,8 +219,8 @@ module fctrl ( // 110 - add // 111 - sub // Div: -// 0 - ??? -// 1 - ??? +// 0 - div +// 1 - sqrt // Cvt Int: {Int to Fp?, 64 bit int?, signed int?} // Cvt Fp: output format // 10 - to half diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index cfa46b657..3e214b0f1 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -259,7 +259,7 @@ module fpu ( // - fdiv // - fsqrt // *** add other opperations - divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, + divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal .EarlyTermShiftM, .QmM, .DivDone(DivDoneM)); diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 66af5b3c5..7ecb823e6 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -58,6 +58,41 @@ module otfc2 ( endmodule +/////////////////////////////// +// Square Root OTFC, Radix 2 // +/////////////////////////////// +module sotfc2( + input logic clk, + input logic Start, + input logic sp, sn, + input logic Sqrt, + input logic [`DIVLEN+3:0] C, + output logic [`DIVLEN-2:0] Sq, + output logic [`DIVLEN+3:0] S, SM +); + // The on-the-fly converter transfers the square root + // bits to the quotient as they come. + // Use this otfc for division and square root. + logic [`DIVLEN+3:0] SNext, SMNext, SMux; + + flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); + mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); + flop #(`DIVLEN+4) Sreg(clk, SMux, S); + + always_comb begin + if (sp) begin + SNext = S | (C & ~(C << 1)); + SMNext = S; + end else if (sn) begin + SNext = SM | (C & ~(C << 1)); + SMNext = SM; + end else begin // If sp and sn are not true, then sz is + SNext = S; + SMNext = SM | (C & ~(C << 1)); + end + end + assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; +endmodule module otfc4 ( input logic [3:0] q, @@ -110,3 +145,38 @@ module otfc4 ( // Final Qmeint is in the range [.5, 2) endmodule + +/////////////////////////////// +// Square Root OTFC, Radix 4 // +/////////////////////////////// +module sotfc4( + input logic [3:0] s, + input logic Sqrt, + input logic [`DIVLEN+3:0] S, SM, + input logic [`DIVLEN+3:0] C, + output logic [`DIVLEN+3:0] SNext, SMNext +); + // The on-the-fly converter transfers the square root + // bits to the quotient as they come. + // Use this otfc for division and square root. + + always_comb begin + if (s[3]) begin + SNext = S | ((C << 1)&~(C << 2)); + SMNext = S | (C&~(C << 1)); + end else if (s[2]) begin + SNext = S | (C&~(C << 1)); + SMNext = S; + end else if (s[1]) begin + SNext = SM | (C&~(C << 2)); + SMNext = SM | ((C << 1)&~(C << 2)); + end else if (s[0]) begin + SNext = SM | ((C << 1)&~(C << 2)); + SMNext = SM | (C&~(C << 1)); + end else begin // If sp and sn are not true, then sz is + SNext = S; + SMNext = SM | (C & ~(C << 2)); + end + end + +endmodule diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 202b3ee81..87c6a4b25 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -62,9 +62,36 @@ module qsel2 ( // *** eventually just change to 4 bits // assign #1 qn = magnitude & sign; endmodule +//////////////////////////////////// +// Adder Input Generation, Radix 2 // +//////////////////////////////////// +module fgen2 ( + input logic sp, sn, + input logic [`DIVLEN+3:0] C, S, SM, + output logic [`DIVLEN+3:0] F +); + logic [`DIVLEN+3:0] FP, FN, FZ; + + // Generate for both positive and negative bits + assign FP = ~(S << 1) & C; + assign FN = (SM << 1) | (C & (~C << 2)); + assign FZ = '0; + + // Choose which adder input will be used + + always_comb + if (sp) F = FP; + else if (sn) F = FN; + else F = FZ; + + // assign F = sp ? FP : (sn ? FN : FZ); + +endmodule + module qsel4 ( input logic [`DIVLEN+3:0] D, input logic [`DIVLEN+3:0] WS, WC, + input logic Sqrt, output logic [3:0] q ); logic [6:0] Wmsbs; @@ -91,45 +118,77 @@ module qsel4 ( else if(w2>=4) QSel4[i] = 4'b0100; else if(w2>=-4) QSel4[i] = 4'b0000; else if(w2>=-13) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else QSel4[i] = 4'b0001; 1: if(w2>=14) QSel4[i] = 4'b1000; else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-15) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-5) QSel4[i] = 4'b0000; // was -6 + else if(~Sqrt&(w2>=-15)) QSel4[i] = 4'b0010; // divide case + else if( Sqrt&(w2>=-14)) QSel4[i] = 4'b0010; // sqrt case + else QSel4[i] = 4'b0001; 2: if(w2>=15) QSel4[i] = 4'b1000; else if(w2>=4) QSel4[i] = 4'b0100; else if(w2>=-6) QSel4[i] = 4'b0000; else if(w2>=-16) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else QSel4[i] = 4'b0001; 3: if(w2>=16) QSel4[i] = 4'b1000; else if(w2>=4) QSel4[i] = 4'b0100; else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-18) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-17) QSel4[i] = 4'b0010; // was -18 + else QSel4[i] = 4'b0001; 4: if(w2>=18) QSel4[i] = 4'b1000; else if(w2>=6) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-6) QSel4[i] = 4'b0000; // was -8 + else if(~Sqrt&(w2>=-20)) QSel4[i] = 4'b0010; // divide case + else if( Sqrt&(w2>=-18)) QSel4[i] = 4'b0010; // sqrt case + else QSel4[i] = 4'b0001; 5: if(w2>=20) QSel4[i] = 4'b1000; else if(w2>=6) QSel4[i] = 4'b0100; else if(w2>=-8) QSel4[i] = 4'b0000; else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else QSel4[i] = 4'b0001; 6: if(w2>=20) QSel4[i] = 4'b1000; else if(w2>=8) QSel4[i] = 4'b0100; else if(w2>=-8) QSel4[i] = 4'b0000; else if(w2>=-22) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 7: if(w2>=24) QSel4[i] = 4'b1000; + else QSel4[i] = 4'b0001; + 7: if(w2>=22) QSel4[i] = 4'b1000; // was 24 else if(w2>=8) QSel4[i] = 4'b0100; else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-24) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-23) QSel4[i] = 4'b0010; // was -24 + else QSel4[i] = 4'b0001; endcase end end assign q = QSel4[{Dmsbs,Wmsbs}]; endmodule + +//////////////////////////////////// +// Adder Input Generation, Radix 4 // +//////////////////////////////////// +module fgen4 ( + input logic [3:0] s, + input logic [`DIVLEN+3:0] C, S, SM, + output logic [`DIVLEN+3:0] F +); + logic [`DIVLEN+3:0] F2, F1, F0, FN1, FN2; + + // Generate for both positive and negative bits + assign F2 = (~S << 2) & (C << 2); + assign F1 = ~(S << 1) & C; + assign F0 = '0; + assign FN1 = (SM << 1) | (C & ~(C << 2)); + assign FN2 = (SM << 2) | ((C << 2)&~(C <<4)); + + // Choose which adder input will be used + + always_comb + if (s[3]) F = F2; + else if (s[2]) F = F1; + else if (s[1]) F = FN1; + else if (s[0]) F = FN2; + else F = F0; + + // assign F = sp ? FP : (sn ? FN : FZ); + +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 7e9f9922a..633ac1787 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -34,18 +34,17 @@ module srt( input logic clk, input logic DivStart, input logic DivBusy, - input logic [`FMTBITS-1:0] FmtE, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, + input logic Sqrt, + input logic [`DIVLEN+3:0] X, + input logic [`DIVLEN+3:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, output logic [`QLEN-1-(`RADIX/4):0] Qm, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] StickyWSA, output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] QeM, output logic [`XLEN-1:0] Rem ); @@ -59,13 +58,19 @@ module srt( logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] S[`DIVCOPIES-1:0]; //***change to QLEN??? + logic [`DIVLEN+3:0] SM[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] SNext[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] SMNext[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] C[`DIVCOPIES-1:0]; /* verilator lint_on UNOPTFLAT */ logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2; - logic [`NE+1:0] Qe; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; logic [`QLEN-1:0] QMMux; + logic [`DIVLEN+3:0] CMux; + logic [`DIVLEN+3:0] SMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -83,13 +88,13 @@ module srt( assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; end - mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); + mux2 #(`DIVLEN+4) wsmux(NextWSN, X, DivStart, WSN); flopen #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); - flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); - flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM); - + flopen #(`DIVLEN+4) dflop(clk, DivStart, Dpreproc, D); + mux2 #(`DIVLEN+4) Cmux({2'b11, C[`DIVCOPIES-1][`DIVLEN+3:2]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, DivStart, CMux); + flop #(`DIVLEN+4) cflop(clk, CMux, C[0]); // Divisor Selections // - choose the negitive version of what's being selected @@ -102,8 +107,9 @@ module srt( genvar i; generate for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations - divinteration divinteration(.D, .DBar, .D2, .DBar2, - .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); + divinteration divinteration(.D, .DBar, .D2, .DBar2, .Sqrt, + .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]), + .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i])); if(i<(`DIVCOPIES-1)) begin if (`RADIX==2)begin assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0}; @@ -111,9 +117,12 @@ module srt( end else begin assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; + assign C[i+1] = {2'b11, C[i][`DIVLEN+3:2]}; end assign Q[i+1] = QNext[i]; assign QM[i+1] = QMNext[i]; + assign S[i+1] = SNext[i]; + assign SM[i+1] = SMNext[i]; end end endgenerate @@ -123,16 +132,27 @@ module srt( flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; + flopr #(`DIVLEN+4) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]); + mux2 #(`DIVLEN+4) Smux(SNext[`DIVCOPIES-1], {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, DivStart, SMux); + flop #(`DIVLEN+4) Sreg(clk, SMux, S[0]); + + always_comb + if(Sqrt) + if(NegSticky) Qm = SM[0][`QLEN-1-(`RADIX/4):0]; + else Qm = S[0][`QLEN-1-(`RADIX/4):0]; + else + if(NegSticky) Qm = QM[0][`QLEN-1-(`RADIX/4):0]; + else Qm = Q[0][`QLEN-1-(`RADIX/4):0]; + assign FirstWS = WS[0]; assign FirstWC = WC[0]; + if(`RADIX==2) if (`DIVCOPIES == 1) assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0}; else assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; - expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe); endmodule @@ -145,8 +165,12 @@ module divinteration ( input logic [`DIVLEN+3:0] D, input logic [`DIVLEN+3:0] DBar, D2, DBar2, input logic [`QLEN-1:0] Q, QM, + input logic [`DIVLEN+3:0] S, SM, input logic [`DIVLEN+3:0] WS, WC, + input logic [`DIVLEN+3:0] C, + input logic Sqrt, output logic [`QLEN-1:0] QNext, QMNext, + output logic [`DIVLEN+3:0] SNext, SMNext, output logic [`DIVLEN+3:0] WSA, WCA ); /* verilator lint_on UNOPTFLAT */ @@ -154,6 +178,8 @@ module divinteration ( logic [`DIVLEN+3:0] Dsel; logic [3:0] q; logic qp, qz;//, qn; + logic [`DIVLEN+3:0] F; + logic [`DIVLEN+3:0] AddIn; // Qmient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) @@ -166,7 +192,8 @@ module divinteration ( if(`RADIX == 2) begin : qsel qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn); end else begin - qsel4 qsel4(.D, .WS, .WC, .q); + qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q); + fgen4 fgen4(.s(q), .C, .S, .SM, .F); end if(`RADIX == 2) begin : dsel @@ -184,16 +211,18 @@ module divinteration ( end // Partial Product Generation // WSA, WCA = WS + WC - qD + assign AddIn = Sqrt ? F : Dsel; if (`RADIX == 2) begin : csa - csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); + csa #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA); end else begin - csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); + csa #(`DIVLEN+4) csa(WS, WC, AddIn, |q[3:2], WSA, WCA); end if (`RADIX == 2) begin : otfc otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext); end else begin otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); + sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext); end endmodule @@ -220,40 +249,3 @@ module csa #(parameter N=69) ( assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | (in2[N-2:0] & in3[N-2:0]), cin}; endmodule - -module expcalc( - input logic [`FMTBITS-1:0] FmtE, - input logic [`NE-1:0] Xe, Ye, - input logic XZeroE, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`NE+1:0] Qe - ); - logic [`NE-2:0] Bias; - - if (`FPSIZES == 1) begin - assign Bias = (`NE-1)'(`BIAS); - - end else if (`FPSIZES == 2) begin - assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); - - end else if (`FPSIZES == 3) begin - always_comb - case (FmtE) - `FMT: Bias = (`NE-1)'(`BIAS); - `FMT1: Bias = (`NE-1)'(`BIAS1); - `FMT2: Bias = (`NE-1)'(`BIAS2); - default: Bias = 'x; - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (FmtE) - 2'h3: Bias = (`NE-1)'(`Q_BIAS); - 2'h1: Bias = (`NE-1)'(`D_BIAS); - 2'h0: Bias = (`NE-1)'(`S_BIAS); - 2'h2: Bias = (`NE-1)'(`H_BIAS); - endcase - end - // correct exponent for denormalized input's normalization shifts - assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; - endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index b9fb8bb82..4d2609179 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -31,16 +31,25 @@ `include "wally-config.vh" module srtpreproc ( + input logic clk, + input logic DivStart, input logic [`NF:0] Xm, Ym, - output logic [`DIVLEN-1:0] X, - output logic [`DIVLEN-1:0] Dpreproc, + input logic [`NE-1:0] Xe, Ye, + input logic [`FMTBITS-1:0] Fmt, + input logic Sqrt, + input logic XZero, + output logic [`NE+1:0] QeM, + output logic [`DIVLEN+3:0] X, + output logic [`DIVLEN+3:0] Dpreproc, output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, output logic [`DURLEN-1:0] Dur ); // logic [`XLEN-1:0] PosA, PosB; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; - logic [`DIVLEN-1:0] PreprocA, PreprocX; - logic [`DIVLEN-1:0] PreprocB, PreprocY; + logic [`NF-1:0] PreprocA, PreprocX; + logic [`NF-1:0] PreprocB, PreprocY; + logic [`NF+3:0] SqrtX; + logic [`NE+1:0] Qe; // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; @@ -49,23 +58,22 @@ module srtpreproc ( // ***can probably merge X LZC with conversion // cout the number of leading zeros - lzc #(`NF+1) lzcA (Xm, XZeroCnt); - lzc #(`NF+1) lzcB (Ym, YZeroCnt); + lzc #(`NF+1) lzcX (Xm, XZeroCnt); + lzc #(`NF+1) lzcY (Ym, YZeroCnt); // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; // assign PreprocA = ExtraA << zeroCntA; // assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {Xm[`NF-1:0]< Date: Thu, 21 Jul 2022 12:47:51 -0700 Subject: [PATCH 072/138] removed ugly /ref/Ref from tests.vh, added back d_fsd-align-01.S and d_fld-align-01.S tests to tests.vh, updated makefile to fix the riscof issues and fix fld fsd tests, updated testbench.sv for comptability with changes --- pipelined/testbench/testbench.sv | 29 +- pipelined/testbench/tests.vh | 1152 ++++++++++---------- tests/riscof/Makefile | 8 +- tests/riscof/sail_cSim/riscof_sail_cSim.py | 4 +- 4 files changed, 607 insertions(+), 586 deletions(-) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 0fb5f5e60..30c62865e 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -195,13 +195,19 @@ logic [3:0] dummy; /* if (tests[0] == `IMPERASTEST) pathname = tvpaths[0]; else pathname = tvpaths[1]; */ - memfilename = {pathname, tests[test], ".elf.memfile"}; + if (riscofTest) memfilename = {pathname, tests[test], "/ref/ref.elf.memfile"}; + else memfilename = {pathname, tests[test], ".elf.memfile"}; if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM); else $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM); if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM); - ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"}; - ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"}; + if (riscofTest) begin + ProgramAddrMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.addr"}; + ProgramLabelMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.lab"}; + end else begin + ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"}; + ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"}; + end // declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array // to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test) updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray); @@ -241,7 +247,8 @@ logic [3:0] dummy; // this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score // also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking $display("Embench Benchmark: %s is done.", tests[test]); - outputfile = {pathname, tests[test], ".sim.output"}; + if (riscofTest) outputfile = {pathname, tests[test], "/ref/ref.sim.output"}; + else outputfile = {pathname, tests[test], ".sim.output"}; outputFilePointer = $fopen(outputfile); i = 0; while ($unsigned(i) < $unsigned(5'd5)) begin @@ -256,7 +263,7 @@ logic [3:0] dummy; for(i=0; i config$(XLEN).ini -build_arch: +fsd_fld_tempfix: + # this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests + # https://github.com/riscv-non-isa/riscv-arch-test/issues/266 + find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fld*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} + find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fsd*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} + +build_arch: fsd_fld_tempfix riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser rm -rf $(arch_workdir)/rv$(XLEN)i_m mv -f $(work_dir)/rv$(XLEN)i_m $(arch_workdir)/ diff --git a/tests/riscof/sail_cSim/riscof_sail_cSim.py b/tests/riscof/sail_cSim/riscof_sail_cSim.py index b86f62b55..dc3033ab3 100644 --- a/tests/riscof/sail_cSim/riscof_sail_cSim.py +++ b/tests/riscof/sail_cSim/riscof_sail_cSim.py @@ -90,7 +90,7 @@ class sail_cSim(pluginTemplate): test_dir = testentry['work_dir'] test_name = test.rsplit('/',1)[1][:-2] - elf = 'Ref.elf' + elf = 'ref.elf' execute = "@cd "+testentry['work_dir']+";" @@ -98,7 +98,7 @@ class sail_cSim(pluginTemplate): compile_cmd = cmd + ' -D' + " -D".join(testentry['macros']) execute+=compile_cmd+";" - execute += self.objdump_cmd.format(elf, self.xlen, 'Ref.elf.objdump') + execute += self.objdump_cmd.format(elf, self.xlen, 'ref.elf.objdump') sig_file = os.path.join(test_dir, self.name[:-1] + ".signature") execute += self.sail_exe[self.xlen] + ' -z268435455 --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name) From 635a02cf6a8796e8ec106b3cdc4f4b0afad2967a Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Thu, 21 Jul 2022 12:50:02 -0700 Subject: [PATCH 073/138] made makefile more specific, just incase future additions --- tests/riscof/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index 259eb3f83..a9a442d38 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -20,8 +20,8 @@ root: fsd_fld_tempfix: # this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests # https://github.com/riscv-non-isa/riscv-arch-test/issues/266 - find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fld*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} - find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fsd*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} + find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fld-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} + find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fsd-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} build_arch: fsd_fld_tempfix riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser From c7e84f8e40f0cdb6a61410651b307fdfa2e805d8 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 21 Jul 2022 20:45:08 +0000 Subject: [PATCH 074/138] Renamed variables, moved output handling to postprocessor, added remainder handling --- pipelined/srt/sqrttestgen | Bin 22792 -> 22792 bytes pipelined/srt/srt.sv | 100 +++++++++++++++++++++++++++++-------- pipelined/srt/testbench.sv | 10 ++-- 3 files changed, 85 insertions(+), 25 deletions(-) diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen index 06615165395cb6abb37c9d60152d5fe70f15e11a..d2cb80d445718cfa8efc5daa24fefc6b5d462149 100755 GIT binary patch delta 695 zcmXxiO=uHA6ae6PsoR()?atZ;R#1#kNU;Y?6~!LxR$N>x6e;8qJj5P~^8llk64tG4?8ZfL&qtv#acVY(I2f8`~B(tn-8&JVB^% zLv8Hs>7 zZo0!)K%q3#VTtNPnpg^0EzqcJH-al(U&QYCp+v(e(*50h`-IMcWAT14@lJdYhTV_x zS%6{8wr4@O&)OwKzBjPkaU>cP3sJ|YK8;uPk)hllrHb?=Rafc)P0_QC=V$eJPOpa(G7U#@UhaWGd`dBa z?w9-@{Yv3#VQdkGFkP@FH8_nI?P>7bbvv)a aS)9o1k8M?W_#ksQW=gABWV&B6>6T65Z3JZi delta 715 zcmXw%L1+^}6o%(DjY&w8oduP8uw6xMwP1rvN=3~IUEPx+nj*a^9-sd^$XE8 z%=wj`)&Wn)BW^HXVD^~LF|RS7VgACLXWn2w%G_c;#N6rWN_gN9FZi6Wn_1}SLRsb{ z^A2Xo+~WrfGXG_^n13^mGXG@G=_^_{9bdfQR-KOf=zd4s9d#ea z7j{{~t$uuefO@(LB=rYD;!8@b2iTgYS<~4HTHbzxRyv>jG3NMPk2NOma_G8&7p1r!Mb&bjm`dZJDyX14^J@N+m zFrBzTKJZU#L3daux5$&!L!0~_X5`r6S5#%3vcyzEO0pb{541)N3?q2Z6pa0u{$5Y!oJbYc8NLkVjH(Ba?aU}L1h0y`7 diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index a7216b9ff..7cbcd0daa 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -35,33 +35,35 @@ module srt ( input logic Start, input logic Stall, // *** multiple pipe stages input logic Flush, // *** multiple pipe stages - // Floating Point Inputs - // later add exponents, signs, special cases + // Floating Point input logic XSign, YSign, input logic [`NE-1:0] XExp, YExp, input logic [`NF-1:0] SrcXFrac, SrcYFrac, + // Integer input logic [`XLEN-1:0] SrcA, SrcB, + // Customization input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit input logic W64, // 32-bit ints on XLEN=64 + // Selection input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic rsign, done, - output logic [`DIVLEN-2:0] Rem, Quot, // *** later handle integers + output logic [`DIVLEN-1:0] Rem, Result, output logic [`NE-1:0] rExp, output logic [3:0] Flags ); - logic qp, qz, qn; // quotient is +1, 0, or -1 + logic qp, qz, qn; // result bits are +1, 0, or -1 logic [`NE-1:0] calcExp; logic calcSign; logic [`DIVLEN+3:0] X, Dpreproc, C, F, S, SM, AddIn; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; - logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; + logic [$clog2(`XLEN+1)-1:0] zeroCntD, intExp, dur, calcDur; logic intSign; logic cin; - srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign); + srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign); // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -91,7 +93,7 @@ module srt ( // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot); // otherwise use sotfc creg sotfcC(clk, Start, Sqrt, C); - sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, Quot, S, SM); + sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, S, SM); fsel2 fsel(qp, qn, C, S, SM, F); // Adder input selection @@ -103,7 +105,7 @@ module srt ( expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); - signcalc signcalc(.XSign, .YSign, .calcSign); + srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Result, .Rem, .calcSign); endmodule //////////////// @@ -123,11 +125,11 @@ module srtpreproc ( input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic [`DIVLEN+3:0] X, D, - output logic [$clog2(`XLEN+1)-1:0] intExp, dur, // Quotient integer exponent + output logic [$clog2(`XLEN+1)-1:0] zeroCntB, intExp, dur, // Quotient integer exponent output logic intSign // Quotient integer sign ); - logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; + logic [$clog2(`XLEN+1)-1:0] zeroCntA; logic [`XLEN-1:0] PosA, PosB; logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX; logic [`NF+4:0] SqrtX; @@ -235,7 +237,7 @@ module otfc2 #(parameter N=66) ( input logic clk, input logic Start, input logic qp, qz, qn, - output logic [N-3:0] r + output logic [N-3:0] Result ); // The on-the-fly converter transfers the quotient // bits to the quotient as they come. @@ -261,7 +263,7 @@ module otfc2 #(parameter N=66) ( QMNext = {QMR, 1'b0}; end end - assign r = Q[N] ? Q[N-1:2] : Q[N-2:1]; + assign Result = Q[N] ? Q[N-1:2] : Q[N-2:1]; endmodule @@ -274,7 +276,6 @@ module sotfc2( input logic sp, sn, input logic Sqrt, input logic [`DIVLEN+3:0] C, - output logic [`DIVLEN-2:0] Sq, output logic [`DIVLEN+3:0] S, SM ); // The on-the-fly converter transfers the square root @@ -298,7 +299,6 @@ module sotfc2( SMNext = SM | (C & ~(C << 1)); end end - assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; endmodule ////////////////////////// @@ -395,14 +395,74 @@ module expcalc( endmodule -////////////// -// signcalc // -////////////// -module signcalc( - input logic XSign, YSign, +module srtpostproc( + input logic [`DIVLEN+3:0] WS, WC, X, D, S, SM, + input logic [$clog2(`XLEN+1)-1:0] dur, zeroCntD, + input logic XSign, YSign, Signed, Int, + output logic [`DIVLEN-1:0] Result, Rem, output logic calcSign ); + logic [`DIVLEN+3:0] W, shiftRem, intRem, intS; + logic [`DIVLEN-1:0] floatRes, intRes; + logic WSign; + assign W = WS + WC; + assign WSign = W[`DIVLEN+3]; + // Remainder handling + always_comb begin + if (zeroCntD == ($clog2(`XLEN+1))'(`XLEN)) begin + intRem = X; + intS = -1; + end + else if (~Signed) begin + if (WSign) begin + intRem = W + D; + intS = SM; + end else begin + intRem = W; + intS = S; + end + end + else case ({YSign, XSign, WSign}) + 3'b000: begin + intRem = W; + intS = S; + end + 3'b001: begin + intRem = W + D; + intS = SM; + end + 3'b010: begin + intRem = W - D; + intS = ~S; + end + 3'b011: begin + intRem = W; + intS = ~SM; + end + 3'b100: begin + intRem = W; + intS = ~SM; + end + 3'b101: begin + intRem = W + D; + intS = ~SM + 1; + end + 3'b110: begin + intRem = W - D; + intS = S + 1; + end + 3'b111: begin + intRem = W; + intS = S; + end + endcase + end + assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0]; + assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0]; + assign Result = Int ? intRes : floatRes; assign calcSign = XSign ^ YSign; + assign shiftRem = intRem >>> dur; + assign Rem = shiftRem[`DIVLEN-1:0]; +endmodule -endmodule \ No newline at end of file diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 7a4e1897b..b72ffb42e 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -49,7 +49,7 @@ module testbench; logic asign, bsign; logic [`NF-1:0] r; logic [`XLEN-1:0] rInt; - logic [`DIVLEN-2:0] Quot; + logic [`DIVLEN-1:0] Quot, Rem; // Test parameters parameter MEM_SIZE = 40000; @@ -72,7 +72,7 @@ module testbench; // Equip Int test or Sqrt test assign Int = 1'b0; - assign Sqrt = 1'b0; + assign Sqrt = 1'b1; // Divider srt srt(.clk, .Start(req), @@ -82,7 +82,7 @@ module testbench; .SrcXFrac(afrac), .SrcYFrac(bfrac), .SrcA(a), .SrcB(b), .Fmt(2'b00), .W64(1'b1), .Signed(1'b0), .Int, .Sqrt, - .Quot, .Rem(), .Flags(), .done); + .Result(Quot), .Rem, .Flags(), .done); // Counter // counter counter(clk, req, done); @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("testvectors", Tests); + $readmemh ("sqrttestvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; @@ -117,7 +117,7 @@ module testbench; always @(posedge clk) begin r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)]; - rInt = {1'b1, Quot}; + rInt = Quot; if (done) begin if (~Int & ~Sqrt) begin req <= #5 1; From 8bfb23320468af2c4e814999f3508ba6d6e5b208 Mon Sep 17 00:00:00 2001 From: cturek Date: Fri, 22 Jul 2022 01:27:08 +0000 Subject: [PATCH 075/138] Changed testbench to operate on two inputs and one output, changed all test generators, changed srt module to return only one output and take in Mod as a signal to compute integer remainder --- pipelined/srt/Makefile | 8 +++- pipelined/srt/exptestgen.c | 4 -- pipelined/srt/inttestgen | Bin 18496 -> 18496 bytes pipelined/srt/inttestgen.c | 23 +++--------- pipelined/srt/modtestgen | Bin 0 -> 18496 bytes pipelined/srt/modtestgen.c | 73 ++++++++++++++++++++++++++++++++++++ pipelined/srt/sqrttestgen | Bin 22792 -> 22792 bytes pipelined/srt/sqrttestgen.c | 4 -- pipelined/srt/srt-waves.do | 1 + pipelined/srt/srt.sv | 22 ++++++----- pipelined/srt/testbench.sv | 40 ++++++++++---------- 11 files changed, 118 insertions(+), 57 deletions(-) create mode 100755 pipelined/srt/modtestgen create mode 100644 pipelined/srt/modtestgen.c diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile index 5d7898b17..ee249139d 100644 --- a/pipelined/srt/Makefile +++ b/pipelined/srt/Makefile @@ -1,4 +1,4 @@ -all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen +all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen modtestgen sqrttestgen: sqrttestgen.c gcc sqrttestgen.c -o sqrttestgen -lm @@ -28,6 +28,10 @@ inttestgen: inttestgen.c gcc -lm -o inttestgen inttestgen.c ./inttestgen +modtestgen: modtestgen.c + gcc -lm -o modtestgen modtestgen.c + ./modtestgen + clean: - rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen + rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen modtestgen diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c index d6bebb774..61fe74aa4 100644 --- a/pipelined/srt/exptestgen.c +++ b/pipelined/srt/exptestgen.c @@ -96,10 +96,6 @@ void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, // Print r in standard double format fprintf(fptr, "%03x", rExp|(rSign<<11)); printhex(fptr, rFrac); - fprintf(fptr, "_"); - - // Spacing for testbench, value doesn't matter - fprintf(fptr, "%016x", 0); fprintf(fptr, "\n"); } diff --git a/pipelined/srt/inttestgen b/pipelined/srt/inttestgen index 2f7e6819945a6b827154fdc1c264eb7d77a29312..9e65cf4a031da28d084759b76e92bd9fd7e4bfc3 100755 GIT binary patch delta 1043 zcmYLIO-vI(6rR~a*(oh;5h#?VK&t#SLP35?lt>A!SwvKj5;gvCk(;I$;(>sTC6ZFq z>Kw$xgI-Kbh)VW?2W^ok#e?BMh{Q_|Hfe%lZGvf}u)b}#?Ig3``}yAY-t6oK{W$2y z{&k{JqSVqn+6ri0inQZW*iwdzzQW&2*13_1h2)Ki=j+=;4-Ioa-jLt#`FX~e$Hs^a zajVO2{)4;*%gdoEpr?_7dfZHlSbg~=pCif5b3XegH|KRkR~-pab-R<=Sfg{`8|QOI zIp27g^EqOicQtB)q{orKp2-B6`si^a!Ag>;7n%ICeL?WAtvESvg!BBY=Dd>$&N~jJ zAmbeX8E1^fSi{kqz)Nd8fUqe_cY-v?fcrUc9qeddniHm-#K0Wcrz6D34Z{3DUV=P| zvbko_F0kWjb1<&fs1QaduD-TBrmhcxpD!LMuD-Z@b!DtcT{SJ%(g3L_8_ugnPVRoA zh!@T@h!eBXb99G%ZF##V2=l;eP6~nxa1>B1EQ2;dz`6rL2m`(b6x)5LzZ56TOHgHj zD6$u5Gyu)8Hz1ZT#$X+WZHKMdqeF$YJ!e5w3&p6XNOO{V4XiUX13Bn&Qx>ZUNaPO%_7frnix8&^-{=oDF zXZ;AFvz2CLCLIA99j&ZXGh(UeiEB4a%DF%~U%bJ0)lJKlyIGPAJvNmqaz5Q@JV}uB zndZ|)%OM)ZCH$49E?kkFF4Z9YkRdf6o<2)Zj)oIq=W6Zp~qUgg^3aRul*Ex!rD^o8QRy;4X=>S zc08!iNTX+TWO9VGsTo_WI+&r)dZ!_EeL(1kTrpB1qx3c!rJ`Xe-p~KBT9nvHYwQ+9 i=J3$xvb}?_QXiH+wOdK*O}Q}M>lQr2Y>J<6?E5h-KC^f+=*){7`Y&7<&$rT4@vdl53E$< zS}3%;CTs zp4%3Bj3|D3=J(ce{bB<6MOXadr_b=$yc@$ywn)`Y^29vV`sV!htBt`SO7B8seS$`RJ}dpC9QC6jWHERtX2 z8c{sk#5WGsy-R!^jrhib#3zS3ln7BawF;>4$`R}xj|daXUO5W4s4y|hC@g>PcPuuy zwc#PkKJosr5oH|8#vlcql1@sxy!Yi^ty6v%;Hfogl56=Kx74cI?p|4>3Dt#!cXZPLn-#F ziv|lvLngvGMfGg45Ys!gTNR_B=(A0;*v zsgRF}94x6&rEWRbt$}T=`v;gd6h?{;Td<);P@s)mH?i9eSQ@llv22)9B$>RXh~_P+ zmN?eF@9z7^_a5KNf&lB5g=drB-S7Kzci;Va_l|e^WMXI_7zhXsLGil+Q*$jb38`-( z<~agrO!SI2_`XHFSF}L7Nn-FNY0N>F$u!ee$rU056!m(83g`{SWQ6I>3KmScgh-o- zr$~%aibmR1lBp1y1W*sly0k_nfgY6+rfDk23{lp@B%*@ z2$i2HRabQ7vd4CA@5-e+bJ=`(x^udBXXnoC(NZD0P1YOjqwKx|qcSIKZ+0z~`Fi*u zhWzV)J$$ZwD)v`jUA)qH-`Bb_fBpw2I#3pylG{y2$OPiME}!JC0jc_M4M18wEbayD zS`Xax!B6?%pZ39T2V8@XI~V|X<@Zt|zHwn8j)9F;K~8`zgPEZT4$L&#gQ z;r8*gY{O}2N>l7toRiumqQ2IXzgIz$`#o|r8;<>-!)6<9pWnJ|INIXSYs0H;3e1=d z2S-$cZo_eGaQOF*TVB&=UuxA)Hb2*<3H{UaR&eEMef9^f=cUNXj$Z=2vgvvF?${WE z7^WwXcWGe-!lr+~G`jZE(=z=Grm@PG=4JXvn8vDJdQ7Ijk7;!Ir8$}Y4yLinmrl#{ zUt=0we`yBNSiYWL4_1D1k6!tuKKuOANOJJP{8C#?=oik-NOIxCDk#^w4WRcQr&0cV zCfc#_5wIY{Y5mmh`+%XJYW@V0UCY)wsOpQYs;X@(3mqG0Q1CoakdI6G9d{zRE{;r=z*v7pIo)B2Lso*9_bB@4t%^RjMbXFO ziq4NKdT|Bl_tvkDi6=I|*JA1F%8Fh|Ea|fkEs64Gs6z_tP&xM<4FxgRC%RGa6Cn5m zCKpE_b|G;EqT*L~pG;iQ&*&%npPT_Jix(2h67D^jSgyef`pLwC{$>3)4=w7I(O>8% z_rqs&Q9oHm&XQiau=svZn@ubSW*=H^d-(gai7ODh()RG5G5>3CtgM{=;d$kdmo&Zd z(6U}BgDRM*><2TWOCSo^&(Ye)!J0}w^#snai<6+PvaHJVx@88H3>Jy);M0ih;OJi5 z;2IS+Uq!6JGof#4mSjnZMi-S%VYK<&JyKWjZ!rgmg2y>~hE9 zb4P3)6x%=kvBApU$L|`fyc!>kSKf#xhfZxuHfds1Kh=r!(9j*wJ6>vC9KHgzJNHJ& z+Sv0f_1B@wYeSV+`ztTUSFZm@efE4n-}O)Bf5q|p`$ys*j~|KO6F2OAPL2;K2U0oi zX{6(PY7Y;H$wJyPOV<77m{lm2n76l3&ZYCWTbi7~HJgN%$>z+}VpDg|&RlMq<&n;> z{BumDm-Ym13VjgQE^Y96?DdtEGeEncT2nxe0>xo_1Nd+r=FM z2qwboKOahjwNp)r@aE4o>*4MbEqb_jcGbRcEFbQThr8q9&3$35FTB1l9O( z)n(W~beZ0_aQt9k{wr5k3Yb0(hBv!AwBG2BXt%blds|QUj-Kt> z=7VNh(^HnhJA1nCKzVFKoRhH=?u*|R_`p<8_qJU^{2ke3x)EhCD7ofhIiJtw$F=dX zF;_-HxMfrMeBoF&?g~pvkJ#Rb&i+ixm9*!m#xBN)-p0hsI@UwfcG*J-ziCdQbaZOf}^jIz8}GNchEmU?cl~|^ZiYDapHe;5^Dn&jFx?hWA^=>KL}1+&^`fd4crt_fQOe>w=j!#eaq9J7pgR_p$y5_SIznckP9}rF}J2 z{B7>5`K%*l`MFJBpf-Ig>qxiGo7rc(+-Kg3eYcKZf8_l@rj7pi_V#&y+x@Z8o^R>B z;~g`U)M$J}j#1eH$YLtKU`+--qDsh0llE#NmAPT5-`+{@Ut! z4X6Bdg3oC!uefQp+em4W2fwc|A`O&Z6EwuAN+M6d?T)wJnHo!AN&a6?)pAL zIKOvueZK*?S3mg{$@4oA%m0vYo?jV%j&QgC|I-IwhYv50dTsN;5BcB)AN&hG_@h4f zlZ3nL`#jUc!C~-R0lc=5e)%XxFGTR zdHX)H^QG39)R1RgAvO(oMBFHnu6h2GQr=}8lEa7x>z{Xj+0=_Mr;2;60WU{6|J6VK}6|rpOMXHt>~CLZJ2Ol$t4q= zC|GU`<-G7{IiJhkYvzu7D)=#@^0HNV%2UaQ7nPT;Tt$_|axy^O%8nUDGglb1*Vl+R$@>*?gx`@zZg@P|`C8mRY+Q>flQPQEk(P|>pc&?)2cmy;|(i>c#K zO{;iZsDriX^5o=kP_d9jEV?>-RUKe*S) zQGPQFID>5@bh4oL9~8#Eq2a#xkTEZvQTPE15g$0x1SF`!ma(Fu;pliKjK$iorxXuNN*(@Kp~YCK<#9xG?z zl6Dp*5=>3t0rY74cpmI2Y84gdezRD@^XF2G5(e<#S&>2oicRI<@hANnwajUV%MKGQ z7G!sbniJHuC(@wHDW#$MxYFP_7*0)Q;mJFWk5%}J$_$h-Xde-U(KZPqNR>-gCYrLW zV)j@W?w;`?;O$equL4W8>g_k)uj0Q;!1q-+2b8sk_d^2jh~R4<2Hw}=d;gk5YhS!> zWTXcoc75LevdG|VEJ)O?-w*h!aHoXxKNE~e$ull}UT0RpluShf7~!5E|j z?5jy4da1&&{E~iReZKE<7vS=hQHfXg-IfuA@J&Uk-){fi101{~!{z7wxvx`2`Th`> zpXHd|3;KBHhSR){H&6N+_$@2>wNq~Aw!yf1i|6dtwd z<8P*J{m%o2KEd+I&=Mo62;ZktDOFaEBdx&4uCI~4Mh0h4$QI<80Z23bV~E@J=SY8! z^hc>3*#zrx`YSGd-nU$)i&1M>kZ6-DJqKybpZ(AKo2_)wYr-YZ{$m*ZYuD#}Q2zTF zNfspP&i~skecms +#include +#include + +/* Constants */ + +#define ENTRIES 10 +#define RANDOM_VECS 500 + +/* Prototypes */ + +void output(FILE *fptr, long a, long b, long rem); +void printhex(FILE *fptr, long x); +double random_input(void); + +/* Main */ + +void main(void) +{ + FILE *fptr; + long a, b, rem; + long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255}; + int i, j; + + if ((fptr = fopen("modtestvectors","w")) == NULL) { + fprintf(stderr, "Couldn't write testvectors file\n"); + exit(1); + } + + for (i=0; iS@dfGW*6;)pR$3B;vl1Fb|V94`HlJm(ND)e8+rKp=J)G)=H#`G zo$YJs~8&&+{Z~M0Ng7L0i#%Qq~qfYzlY>gQ41$-}chVq=T zq|&7q(!bnZkC(0&@FDL6RSvFy(8XB#MqXbPE8b4py~X3OOK#$W@QmC!IP>3()tGTy zU#%K{W3~JEhcj2NB#fzB#e?=fI_AZy60TQLhH^9R#6{1(0h!4S(+T8No zKbDmfz5D9aw_JU4wd+gC>tk#)hCUZ@xT-eubLNWbl_3=f!<|NhN6np0(?Nd}uSb*^ zEOqz!=VV8%X5*vZ@-+tG&mEmor89_AV61af9bOeOe?fFl#2=&n9k8_b0Vwc#^YuNuIgcWPZiV$?9oks=4%yl0;bs9=R!o<+&CTav zdtZpR!v4NxY`4Ti4VBGQX7DO(xnvfEr{Xx$9Ep25-wiM8X`X=CJ%!ea2~t>#*{4*V;Wt gp(WAd@1DC%W^pcCgdNmOX4+bBa z^Pvh2ZG|+3GoKplgIZ0)A5A3P2OCMaBQ?D?P1D0PrP`QY+BCU}B*;roq+P$+{lGfO z&g}1-Z)SF9Cp&ze51;4pv?uCiR*8P{Yr>mUY$TPSb(4fie!FDvM_+cGY5Z<$-^8uW zFD?FZbn4yJo5q;qkgFhA9AtWGE)9>}3j=ptjIAi%lR?uoR}|H?K-S6E$(`gBIYLg5 zqvRjSN60^r`^n?v_igvNzOn<_pjiD3VOZcZLR=tD7!x>6_*&o;VM5?IVOroA;etR6 zW<7y{*X_{B>#V;Rhqz0ULjYK{~%YB?~-?+Q(Df;^!|#O8 zi;nVhAW7wX9BQQsJ_QG)n(lMrkc^}*h!d4vTD5jd%jl=I97eO^pxO7A5^su(8O#uY zpAc+z>dC>=)TNJHN)(FRQTRrx+c(1*>rooCOzhs`Q}}_j3|4ZzL+sa~IXse9K+9l` z%ZJg}?Uc4AW?$^Fva?;F$QAq`?3C*cPCvQeZ-1NSkhbO=`e3c`!%yQ^E{64y``Li? z4z)?K3ZqqelR;-?Kw9EO9tmH0WY%7Mo$4i1qbsL{C zuBdJqmdkan8_!gXmU?vD4pBN`zOmhtmThT9`QEF1gH7;PQ=PQl*@Q!2ux?8+ZVh8X zqG(pmhD1*P(>ILn<`elIZ6jyCam)V&)hz)}C-sM6zI8KXc!_bW<$2C^WNDtlBG~$P z5_quG>1BS^a0GYe@igpi_i+slv@hU&RmfMizPR-~SaCey%ptQTcqN3HMsLW?c{jYL z#c;9*S_2P4S;xzK33@wPak0scL|K%cWPOV5`HXp)c!cBoeYngQ`$x-eNI|RnIwi$0 yXA_gzw?>;jF7dbF!D|g0`>I1#xY-k-X(MnL_21w3Y`D45uiDe$d)*Ch&-)idgXL=g diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c index 76c6a6649..7a45449fd 100644 --- a/pipelined/srt/sqrttestgen.c +++ b/pipelined/srt/sqrttestgen.c @@ -86,10 +86,6 @@ void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac) // Print r in standard double format fprintf(fptr, "%03x", rExp); printhex(fptr, rFrac); - fprintf(fptr, "_"); - - // Spacing for testbench, value doesn't matter - fprintf(fptr, "%016x", 0); fprintf(fptr, "\n"); } diff --git a/pipelined/srt/srt-waves.do b/pipelined/srt/srt-waves.do index 1e0c3f281..2fbf40c18 100644 --- a/pipelined/srt/srt-waves.do +++ b/pipelined/srt/srt-waves.do @@ -2,4 +2,5 @@ add wave -noupdate /testbench/* add wave -noupdate /testbench/srt/* add wave -noupdate /testbench/srt/sotfc2/* add wave -noupdate /testbench/srt/preproc/* +add wave -noupdate /testbench/srt/postproc/* add wave -noupdate /testbench/srt/divcounter/* diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 7cbcd0daa..1b61bc14f 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -47,9 +47,10 @@ module srt ( // Selection input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs + input logic Mod, // perform remainder calculation (modulo) instead of divide input logic Sqrt, // perform square root, not divide output logic rsign, done, - output logic [`DIVLEN-1:0] Rem, Result, + output logic [`DIVLEN-1:0] Result, output logic [`NE-1:0] rExp, output logic [3:0] Flags ); @@ -63,7 +64,7 @@ module srt ( logic intSign; logic cin; - srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign); + srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Mod, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign); // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -105,7 +106,7 @@ module srt ( expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); - srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Result, .Rem, .calcSign); + srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Mod, .Result, .calcSign); endmodule //////////////// @@ -123,6 +124,7 @@ module srtpreproc ( input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs + input logic Mod, // perform remainder calculation (modulo) instead of divide input logic Sqrt, // perform square root, not divide output logic [`DIVLEN+3:0] X, D, output logic [$clog2(`XLEN+1)-1:0] zeroCntB, intExp, dur, // Quotient integer exponent @@ -161,7 +163,7 @@ module srtpreproc ( assign D = {4'b0001, Int ? PreprocB : PreprocY}; // Integer exponent and sign calculations - assign intExp = zeroCntB - zeroCntA + 1; + assign intExp = zeroCntB - zeroCntA - Mod + (PreprocA >= PreprocB); assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); // Number of cycles of divider @@ -398,8 +400,8 @@ endmodule module srtpostproc( input logic [`DIVLEN+3:0] WS, WC, X, D, S, SM, input logic [$clog2(`XLEN+1)-1:0] dur, zeroCntD, - input logic XSign, YSign, Signed, Int, - output logic [`DIVLEN-1:0] Result, Rem, + input logic XSign, YSign, Signed, Int, Mod, + output logic [`DIVLEN-1:0] Result, output logic calcSign ); logic [`DIVLEN+3:0] W, shiftRem, intRem, intS; @@ -460,9 +462,11 @@ module srtpostproc( end assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0]; assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0]; - assign Result = Int ? intRes : floatRes; + assign shiftRem = (intRem >>> (`DIVLEN - dur + 2)); + always_comb + if (Int) Result = intRes >> (`DIVLEN - dur); + else if (Mod) Result = shiftRem[`DIVLEN-1:0]; + else Result = floatRes; assign calcSign = XSign ^ YSign; - assign shiftRem = intRem >>> dur; - assign Rem = shiftRem[`DIVLEN-1:0]; endmodule diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index b72ffb42e..513305b26 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -42,24 +42,23 @@ module testbench; logic clk; logic req; logic done; - logic Int; + logic Int, Sqrt, Mod; logic [`XLEN-1:0] a, b; logic [`NF-1:0] afrac, bfrac; logic [`NE-1:0] aExp, bExp; logic asign, bsign; logic [`NF-1:0] r; logic [`XLEN-1:0] rInt; - logic [`DIVLEN-1:0] Quot, Rem; + logic [`DIVLEN-1:0] Quot; // Test parameters parameter MEM_SIZE = 40000; parameter MEM_WIDTH = 64+64+64+64; // Test sizes - `define memrem 63:0 - `define memr 127:64 - `define memb 191:128 - `define mema 255:192 + `define memr 63:0 + `define memb 127:64 + `define mema 191:128 // Test logicisters logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file @@ -70,9 +69,10 @@ module testbench; logic rsign; integer testnum, errors; - // Equip Int test or Sqrt test - assign Int = 1'b0; - assign Sqrt = 1'b1; + // Equip Int, Sqrt, or IntMod test + assign Int = 1'b1; + assign Mod = 1'b0; + assign Sqrt = 1'b0; // Divider srt srt(.clk, .Start(req), @@ -81,8 +81,8 @@ module testbench; .XSign(asign), .YSign(bsign), .rsign, .SrcXFrac(afrac), .SrcYFrac(bfrac), .SrcA(a), .SrcB(b), .Fmt(2'b00), - .W64(1'b1), .Signed(1'b0), .Int, .Sqrt, - .Result(Quot), .Rem, .Flags(), .done); + .W64(1'b1), .Signed(1'b0), .Int, .Mod, .Sqrt, + .Result(Quot), .Flags(), .done); // Counter // counter counter(clk, req, done); @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("sqrttestvectors", Tests); + $readmemh ("inttestvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; @@ -109,7 +109,7 @@ module testbench; {bsign, bExp, bfrac} = b; nextr = Vec[`memr]; r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)]; - rInt = {1'b1, Quot}; + rInt = Quot; req <= #5 1; end @@ -119,7 +119,7 @@ module testbench; r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)]; rInt = Quot; if (done) begin - if (~Int & ~Sqrt) begin + if (~Int & ~Sqrt) begin // This test case checks floating point division req <= #5 1; diffp = correctr[51:0] - r; diffn = r - correctr[51:0]; @@ -135,23 +135,21 @@ module testbench; $display("%d Tests completed successfully", testnum); $stop; end - end else if (~Sqrt) begin + end else if (~Sqrt) begin // This test case works for both integer divide and integer modulo req <= #5 1; diffp = correctr[63:0] - rInt; - diffn = rInt - correctr[63:0]; - if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + if (($signed(diffp) != 0) | (diffp === 64'bx)) // check if accurate to 1 ulp begin errors = errors+1; - $display("result was %h, should be %h %h %h\n", rInt, correctr, diffn, diffp); + $display("result was %h, should be %h %h\n", rInt, correctr, diffp); $display("failed\n"); - $stop; end if (afrac === 52'hxxxxxxxxxxxxx) begin - $display("%d Tests completed successfully", testnum); + $display("%d Tests completed successfully", testnum - errors); $stop; end - end else begin + end else begin // This test case verifies square root req <= #5 1; diffp = correctr[51:0] - r; diffn = r - correctr[51:0]; From 3d2c6683d8d4331134ea8f54a4b318812e38e97f Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 21 Jul 2022 20:35:46 -0700 Subject: [PATCH 076/138] Fixed UART bug related to parity and MSR/LSR --- pipelined/src/uncore/uartPC16550D.sv | 50 ++++++++++++++++------------ 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv index 524a63454..89bdc837d 100644 --- a/pipelined/src/uncore/uartPC16550D.sv +++ b/pipelined/src/uncore/uartPC16550D.sv @@ -165,6 +165,7 @@ module uartPC16550D( SCR <= #1 8'b0; // not strictly necessary to reset end else begin if (~MEMWb) begin + /* verilator lint_off CASEINCOMPLETE */ case (A) /* -----\/----- EXCLUDED -----\/----- 3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section @@ -177,34 +178,40 @@ module uartPC16550D( // freq /baud / 16 = div //3'b000: if (DLAB) DLL <= #1 8'd38; //else TXHR <= #1 Din; // TX handled in TX register/FIFO section //3'b000: if (DLAB) DLL <= #1 8'd11; //else TXHR <= #1 Din; // TX handled in - 3'b000: if (DLAB) DLL <= #1 8'd8; //else TXHR <= #1 Din; // TX handled in + 3'b000: if (DLAB) DLL <= #1 8'd8; //else TXHR <= #1 Din; // TX handled in 3'b001: if (DLAB) DLM <= #1 8'b0; else IER <= #1 Din[3:0]; - 3'b010: FCR <= #1 {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing 3'b011: LCR <= #1 Din; 3'b100: MCR <= #1 Din[4:0]; - 3'b101: LSR[6:1] <= #1 Din[6:1]; // recommended only for test, see 8.6.3 - 3'b110: MSR <= #1 Din[3:0]; 3'b111: SCR <= #1 Din; endcase + /* verilator lint_on CASEINCOMPLETE */ end - + // Line Status Register (8.6.3) - // Ben 6/9/21 I don't like how this is a register. A lot of the individual bits have clocked components, so this just adds unecessary delay. - LSR[0] <= #1 rxdataready; // Data ready - LSR[1] <= #1 (LSR[1] | RXBR[10]) & ~squashRXerrIP;; // overrun error - LSR[2] <= #1 (LSR[2] | RXBR[9]) & ~squashRXerrIP; // parity error - LSR[3] <= #1 (LSR[3] | RXBR[8]) & ~squashRXerrIP; // framing error - LSR[4] <= #1 (LSR[4] | rxbreak) & ~squashRXerrIP; // break indicator - LSR[5] <= #1 THRE; // THRE - LSR[6] <= #1 ~txsrfull & THRE; // TEMT - if (rxfifohaserr) LSR[7] <= #1 1; // any bits in FIFO have error + // Ben 6/9/21 I don't like how this is a register. A lot of the individual bits have clocked components, so this just adds unecessary delay. + if (~MEMWb & (A == 3'b101)) + LSR[6:1] <= #1 Din[6:1]; // recommended only for test, see 8.6.3 + else begin + LSR[0] <= #1 rxdataready; // Data ready + LSR[1] <= #1 (LSR[1] | RXBR[10]) & ~squashRXerrIP;; // overrun error + LSR[2] <= #1 (LSR[2] | RXBR[9]) & ~squashRXerrIP; // parity error + LSR[3] <= #1 (LSR[3] | RXBR[8]) & ~squashRXerrIP; // framing error + LSR[4] <= #1 (LSR[4] | rxbreak) & ~squashRXerrIP; // break indicator + LSR[5] <= #1 THRE; // THRE + LSR[6] <= #1 ~txsrfull & THRE; // TEMT + if (rxfifohaserr) LSR[7] <= #1 1; // any bits in FIFO have error + end // Modem Status Register (8.6.8) - MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send - MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready - MSR[2] <= #1 MSR[2] | (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator - MSR[3] <= #1 MSR[3] | DCDb2 ^ DCDbsync; // Delta Data Carrier Detect + if (~MEMWb & (A == 3'b110)) + MSR <= #1 Din[3:0]; + else begin + MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send + MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready + MSR[2] <= #1 MSR[2] | (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator + MSR[3] <= #1 MSR[3] | DCDb2 ^ DCDbsync; // Delta Data Carrier Detect + end end always_comb if (~MEMRb) @@ -215,7 +222,8 @@ module uartPC16550D( 3'b011: Dout = LCR; 3'b100: Dout = {3'b000, MCR}; 3'b101: Dout = LSR; - 3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]}; + // 3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]}; + 3'b110: Dout = {~DCDbsync, ~RIbsync, ~DSRbsync, ~CTSbsync, MSR[3:0]}; 3'b111: Dout = SCR; endcase else Dout = 8'b0; @@ -304,7 +312,7 @@ module uartPC16550D( // ERROR CONDITIONS assign rxparity = ^rxdata; - assign rxparityerr = rxparity ^ rxparitybit ^ ~evenparitysel; // Check even/odd parity (*** check if LCR needs to be inverted) + assign rxparityerr = (rxparity ^ rxparitybit ^ ~evenparitysel) & LCR[3]; // Check even/odd parity (*** check if LCR needs to be inverted) assign rxoverrunerr = fifoenabled ? (rxfifoentries == 15) : rxdataready; // overrun if FIFO or receive buffer register full assign rxframingerr = ~rxstopbit; // framing error if no stop bit assign rxbreak = rxframingerr & (rxdata9 == 9'b0); // break when 0 for start + data + parity + stop time @@ -405,7 +413,7 @@ module uartPC16550D( txstate <= #1 UART_IDLE; end - assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) + assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) - 1 // *** explain; is this necessary? if (`QEMU) assign txnextbit = txbaudpulse & (txoversampledcnt[1:0] == 2'b00); // implies txstate = UART_ACTIVE else assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE From 8dcb794bbb713e144ef420c3cf008408cfc6d47e Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Thu, 21 Jul 2022 20:58:58 -0700 Subject: [PATCH 077/138] added support for new version of riscof and arch tests, now supports tests that can be compiled for both rv32 and rv64 --- pipelined/testbench/tests.vh | 346 ++++++++++----------- tests/riscof/Makefile | 5 +- tests/riscof/sail_cSim/riscof_sail_cSim.py | 2 +- tests/riscof/spike/riscof_spike.py | 2 +- 4 files changed, 178 insertions(+), 177 deletions(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index df06eb010..b10bb951b 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1057,176 +1057,176 @@ string imperas32f[] = '{ string arch64d[] = '{ `RISCVARCHTEST, - "rv64i_m/D/src/d_fadd_b10-01.S", - "rv64i_m/D/src/d_fadd_b1-01.S", - "rv64i_m/D/src/d_fadd_b11-01.S", - "rv64i_m/D/src/d_fadd_b12-01.S", - "rv64i_m/D/src/d_fadd_b13-01.S", - "rv64i_m/D/src/d_fadd_b2-01.S", - "rv64i_m/D/src/d_fadd_b3-01.S", - "rv64i_m/D/src/d_fadd_b4-01.S", - "rv64i_m/D/src/d_fadd_b5-01.S", - "rv64i_m/D/src/d_fadd_b7-01.S", - "rv64i_m/D/src/d_fadd_b8-01.S", - "rv64i_m/D/src/d_fclass_b1-01.S", - "rv64i_m/D/src/d_fcvt.d.l_b25-01.S", - "rv64i_m/D/src/d_fcvt.d.l_b26-01.S", - "rv64i_m/D/src/d_fcvt.d.lu_b25-01.S", - "rv64i_m/D/src/d_fcvt.d.lu_b26-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b1-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b22-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b23-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b24-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b27-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b28-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b29-01.S", - "rv64i_m/D/src/d_fcvt.d.w_b25-01.S", - "rv64i_m/D/src/d_fcvt.d.w_b26-01.S", - "rv64i_m/D/src/d_fcvt.d.wu_b25-01.S", - "rv64i_m/D/src/d_fcvt.d.wu_b26-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b1-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b22-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b23-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b24-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b27-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b28-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b29-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b1-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b22-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b23-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b24-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b27-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b28-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b29-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b1-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b22-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b23-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b24-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b27-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b28-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b29-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b1-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b22-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b23-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b24-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b27-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b28-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b29-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b1-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b22-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b23-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b24-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b27-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b28-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b29-01.S", - "rv64i_m/D/src/d_fdiv_b1-01.S", - "rv64i_m/D/src/d_fdiv_b20-01.S", - "rv64i_m/D/src/d_fdiv_b2-01.S", - "rv64i_m/D/src/d_fdiv_b21-01.S", - "rv64i_m/D/src/d_fdiv_b3-01.S", - "rv64i_m/D/src/d_fdiv_b4-01.S", - "rv64i_m/D/src/d_fdiv_b5-01.S", - "rv64i_m/D/src/d_fdiv_b6-01.S", - "rv64i_m/D/src/d_fdiv_b7-01.S", - "rv64i_m/D/src/d_fdiv_b8-01.S", - "rv64i_m/D/src/d_fdiv_b9-01.S", - "rv64i_m/D/src/d_feq_b1-01.S", - "rv64i_m/D/src/d_feq_b19-01.S", - "rv64i_m/D/src/d_fle_b1-01.S", - "rv64i_m/D/src/d_fle_b19-01.S", - "rv64i_m/D/src/d_flt_b1-01.S", - "rv64i_m/D/src/d_flt_b19-01.S", - "rv64i_m/D/src/d_fld-align-01.S", - "rv64i_m/D/src/d_fsd-align-01.S", - "rv64i_m/D/src/d_fmadd_b14-01.S", - "rv64i_m/D/src/d_fmadd_b16-01.S", - "rv64i_m/D/src/d_fmadd_b17-01.S", - "rv64i_m/D/src/d_fmadd_b18-01.S", - "rv64i_m/D/src/d_fmadd_b2-01.S", - "rv64i_m/D/src/d_fmadd_b3-01.S", - "rv64i_m/D/src/d_fmadd_b4-01.S", - "rv64i_m/D/src/d_fmadd_b5-01.S", - "rv64i_m/D/src/d_fmadd_b6-01.S", - "rv64i_m/D/src/d_fmadd_b7-01.S", - "rv64i_m/D/src/d_fmadd_b8-01.S", - "rv64i_m/D/src/d_fmax_b1-01.S", - "rv64i_m/D/src/d_fmax_b19-01.S", - "rv64i_m/D/src/d_fmin_b1-01.S", - "rv64i_m/D/src/d_fmin_b19-01.S", - "rv64i_m/D/src/d_fmsub_b14-01.S", - "rv64i_m/D/src/d_fmsub_b16-01.S", - "rv64i_m/D/src/d_fmsub_b17-01.S", - "rv64i_m/D/src/d_fmsub_b18-01.S", - "rv64i_m/D/src/d_fmsub_b2-01.S", - "rv64i_m/D/src/d_fmsub_b3-01.S", - "rv64i_m/D/src/d_fmsub_b4-01.S", - "rv64i_m/D/src/d_fmsub_b5-01.S", - "rv64i_m/D/src/d_fmsub_b6-01.S", - "rv64i_m/D/src/d_fmsub_b7-01.S", - "rv64i_m/D/src/d_fmsub_b8-01.S", - "rv64i_m/D/src/d_fmul_b1-01.S", - "rv64i_m/D/src/d_fmul_b2-01.S", - "rv64i_m/D/src/d_fmul_b3-01.S", - "rv64i_m/D/src/d_fmul_b4-01.S", - "rv64i_m/D/src/d_fmul_b5-01.S", - "rv64i_m/D/src/d_fmul_b6-01.S", - "rv64i_m/D/src/d_fmul_b7-01.S", - "rv64i_m/D/src/d_fmul_b8-01.S", - "rv64i_m/D/src/d_fmul_b9-01.S", - "rv64i_m/D/src/d_fmv.d.x_b25-01.S", - "rv64i_m/D/src/d_fmv.d.x_b26-01.S", - "rv64i_m/D/src/d_fmv.x.d_b1-01.S", - "rv64i_m/D/src/d_fmv.x.d_b22-01.S", - "rv64i_m/D/src/d_fmv.x.d_b23-01.S", - "rv64i_m/D/src/d_fmv.x.d_b24-01.S", - "rv64i_m/D/src/d_fmv.x.d_b27-01.S", - "rv64i_m/D/src/d_fmv.x.d_b28-01.S", - "rv64i_m/D/src/d_fmv.x.d_b29-01.S", - "rv64i_m/D/src/d_fnmadd_b14-01.S", - "rv64i_m/D/src/d_fnmadd_b16-01.S", - "rv64i_m/D/src/d_fnmadd_b17-01.S", - "rv64i_m/D/src/d_fnmadd_b18-01.S", - "rv64i_m/D/src/d_fnmadd_b2-01.S", - "rv64i_m/D/src/d_fnmadd_b3-01.S", - "rv64i_m/D/src/d_fnmadd_b4-01.S", - "rv64i_m/D/src/d_fnmadd_b5-01.S", - "rv64i_m/D/src/d_fnmadd_b6-01.S", - "rv64i_m/D/src/d_fnmadd_b7-01.S", - "rv64i_m/D/src/d_fnmadd_b8-01.S", - "rv64i_m/D/src/d_fnmsub_b14-01.S", - "rv64i_m/D/src/d_fnmsub_b16-01.S", - "rv64i_m/D/src/d_fnmsub_b17-01.S", - "rv64i_m/D/src/d_fnmsub_b18-01.S", - "rv64i_m/D/src/d_fnmsub_b2-01.S", - "rv64i_m/D/src/d_fnmsub_b3-01.S", - "rv64i_m/D/src/d_fnmsub_b4-01.S", - "rv64i_m/D/src/d_fnmsub_b5-01.S", - "rv64i_m/D/src/d_fnmsub_b6-01.S", - "rv64i_m/D/src/d_fnmsub_b7-01.S", - "rv64i_m/D/src/d_fnmsub_b8-01.S", - "rv64i_m/D/src/d_fsgnj_b1-01.S", - "rv64i_m/D/src/d_fsgnjn_b1-01.S", - "rv64i_m/D/src/d_fsgnjx_b1-01.S", - // "rv64i_m/D/src/d_fsqrt_b1-01.S", - // "rv64i_m/D/src/d_fsqrt_b20-01.S", - // "rv64i_m/D/src/d_fsqrt_b2-01.S", - // "rv64i_m/D/src/d_fsqrt_b3-01.S", - // "rv64i_m/D/src/d_fsqrt_b4-01.S", - // "rv64i_m/D/src/d_fsqrt_b5-01.S", - // "rv64i_m/D/src/d_fsqrt_b7-01.S", - // "rv64i_m/D/src/d_fsqrt_b8-01.S", - // "rv64i_m/D/src/d_fsqrt_b9-01.S", - "rv64i_m/D/src/d_fsub_b10-01.S", - "rv64i_m/D/src/d_fsub_b1-01.S", - "rv64i_m/D/src/d_fsub_b11-01.S", - "rv64i_m/D/src/d_fsub_b12-01.S", - "rv64i_m/D/src/d_fsub_b13-01.S", - "rv64i_m/D/src/d_fsub_b2-01.S", - "rv64i_m/D/src/d_fsub_b3-01.S", - "rv64i_m/D/src/d_fsub_b4-01.S", - "rv64i_m/D/src/d_fsub_b5-01.S", - "rv64i_m/D/src/d_fsub_b7-01.S", - "rv64i_m/D/src/d_fsub_b8-01.S" + "rv64i_m/D/src/fadd.d_b10-01.S", + "rv64i_m/D/src/fadd.d_b1-01.S", + "rv64i_m/D/src/fadd.d_b11-01.S", + "rv64i_m/D/src/fadd.d_b12-01.S", + "rv64i_m/D/src/fadd.d_b13-01.S", + "rv64i_m/D/src/fadd.d_b2-01.S", + "rv64i_m/D/src/fadd.d_b3-01.S", + "rv64i_m/D/src/fadd.d_b4-01.S", + "rv64i_m/D/src/fadd.d_b5-01.S", + "rv64i_m/D/src/fadd.d_b7-01.S", + "rv64i_m/D/src/fadd.d_b8-01.S", + "rv64i_m/D/src/fclass.d_b1-01.S", + "rv64i_m/D/src/fcvt.d.l_b25-01.S", + "rv64i_m/D/src/fcvt.d.l_b26-01.S", + "rv64i_m/D/src/fcvt.d.lu_b25-01.S", + "rv64i_m/D/src/fcvt.d.lu_b26-01.S", + "rv64i_m/D/src/fcvt.d.s_b1-01.S", + "rv64i_m/D/src/fcvt.d.s_b22-01.S", + "rv64i_m/D/src/fcvt.d.s_b23-01.S", + "rv64i_m/D/src/fcvt.d.s_b24-01.S", + "rv64i_m/D/src/fcvt.d.s_b27-01.S", + "rv64i_m/D/src/fcvt.d.s_b28-01.S", + "rv64i_m/D/src/fcvt.d.s_b29-01.S", + "rv64i_m/D/src/fcvt.d.w_b25-01.S", + "rv64i_m/D/src/fcvt.d.w_b26-01.S", + "rv64i_m/D/src/fcvt.d.wu_b25-01.S", + "rv64i_m/D/src/fcvt.d.wu_b26-01.S", + "rv64i_m/D/src/fcvt.l.d_b1-01.S", + "rv64i_m/D/src/fcvt.l.d_b22-01.S", + "rv64i_m/D/src/fcvt.l.d_b23-01.S", + "rv64i_m/D/src/fcvt.l.d_b24-01.S", + "rv64i_m/D/src/fcvt.l.d_b27-01.S", + "rv64i_m/D/src/fcvt.l.d_b28-01.S", + "rv64i_m/D/src/fcvt.l.d_b29-01.S", + "rv64i_m/D/src/fcvt.lu.d_b1-01.S", + "rv64i_m/D/src/fcvt.lu.d_b22-01.S", + "rv64i_m/D/src/fcvt.lu.d_b23-01.S", + "rv64i_m/D/src/fcvt.lu.d_b24-01.S", + "rv64i_m/D/src/fcvt.lu.d_b27-01.S", + "rv64i_m/D/src/fcvt.lu.d_b28-01.S", + "rv64i_m/D/src/fcvt.lu.d_b29-01.S", + "rv64i_m/D/src/fcvt.s.d_b1-01.S", + "rv64i_m/D/src/fcvt.s.d_b22-01.S", + "rv64i_m/D/src/fcvt.s.d_b23-01.S", + "rv64i_m/D/src/fcvt.s.d_b24-01.S", + "rv64i_m/D/src/fcvt.s.d_b27-01.S", + "rv64i_m/D/src/fcvt.s.d_b28-01.S", + "rv64i_m/D/src/fcvt.s.d_b29-01.S", + "rv64i_m/D/src/fcvt.w.d_b1-01.S", + "rv64i_m/D/src/fcvt.w.d_b22-01.S", + "rv64i_m/D/src/fcvt.w.d_b23-01.S", + "rv64i_m/D/src/fcvt.w.d_b24-01.S", + "rv64i_m/D/src/fcvt.w.d_b27-01.S", + "rv64i_m/D/src/fcvt.w.d_b28-01.S", + "rv64i_m/D/src/fcvt.w.d_b29-01.S", + "rv64i_m/D/src/fcvt.wu.d_b1-01.S", + "rv64i_m/D/src/fcvt.wu.d_b22-01.S", + "rv64i_m/D/src/fcvt.wu.d_b23-01.S", + "rv64i_m/D/src/fcvt.wu.d_b24-01.S", + "rv64i_m/D/src/fcvt.wu.d_b27-01.S", + "rv64i_m/D/src/fcvt.wu.d_b28-01.S", + "rv64i_m/D/src/fcvt.wu.d_b29-01.S", + "rv64i_m/D/src/fdiv.d_b1-01.S", + "rv64i_m/D/src/fdiv.d_b20-01.S", + "rv64i_m/D/src/fdiv.d_b2-01.S", + "rv64i_m/D/src/fdiv.d_b21-01.S", + "rv64i_m/D/src/fdiv.d_b3-01.S", + "rv64i_m/D/src/fdiv.d_b4-01.S", + "rv64i_m/D/src/fdiv.d_b5-01.S", + "rv64i_m/D/src/fdiv.d_b6-01.S", + "rv64i_m/D/src/fdiv.d_b7-01.S", + "rv64i_m/D/src/fdiv.d_b8-01.S", + "rv64i_m/D/src/fdiv.d_b9-01.S", + "rv64i_m/D/src/feq.d_b1-01.S", + "rv64i_m/D/src/feq.d_b19-01.S", + "rv64i_m/D/src/fle.d_b1-01.S", + "rv64i_m/D/src/fle.d_b19-01.S", + "rv64i_m/D/src/flt.d_b1-01.S", + "rv64i_m/D/src/flt.d_b19-01.S", + // "rv64i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back + // "rv64i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266 + "rv64i_m/D/src/fmadd.d_b14-01.S", + "rv64i_m/D/src/fmadd.d_b16-01.S", + "rv64i_m/D/src/fmadd.d_b17-01.S", + "rv64i_m/D/src/fmadd.d_b18-01.S", + "rv64i_m/D/src/fmadd.d_b2-01.S", + "rv64i_m/D/src/fmadd.d_b3-01.S", + "rv64i_m/D/src/fmadd.d_b4-01.S", + "rv64i_m/D/src/fmadd.d_b5-01.S", + "rv64i_m/D/src/fmadd.d_b6-01.S", + "rv64i_m/D/src/fmadd.d_b7-01.S", + "rv64i_m/D/src/fmadd.d_b8-01.S", + "rv64i_m/D/src/fmax.d_b1-01.S", + "rv64i_m/D/src/fmax.d_b19-01.S", + "rv64i_m/D/src/fmin.d_b1-01.S", + "rv64i_m/D/src/fmin.d_b19-01.S", + "rv64i_m/D/src/fmsub.d_b14-01.S", + "rv64i_m/D/src/fmsub.d_b16-01.S", + "rv64i_m/D/src/fmsub.d_b17-01.S", + "rv64i_m/D/src/fmsub.d_b18-01.S", + "rv64i_m/D/src/fmsub.d_b2-01.S", + "rv64i_m/D/src/fmsub.d_b3-01.S", + "rv64i_m/D/src/fmsub.d_b4-01.S", + "rv64i_m/D/src/fmsub.d_b5-01.S", + "rv64i_m/D/src/fmsub.d_b6-01.S", + "rv64i_m/D/src/fmsub.d_b7-01.S", + "rv64i_m/D/src/fmsub.d_b8-01.S", + "rv64i_m/D/src/fmul.d_b1-01.S", + "rv64i_m/D/src/fmul.d_b2-01.S", + "rv64i_m/D/src/fmul.d_b3-01.S", + "rv64i_m/D/src/fmul.d_b4-01.S", + "rv64i_m/D/src/fmul.d_b5-01.S", + "rv64i_m/D/src/fmul.d_b6-01.S", + "rv64i_m/D/src/fmul.d_b7-01.S", + "rv64i_m/D/src/fmul.d_b8-01.S", + "rv64i_m/D/src/fmul.d_b9-01.S", + "rv64i_m/D/src/fmv.d.x_b25-01.S", + "rv64i_m/D/src/fmv.d.x_b26-01.S", + "rv64i_m/D/src/fmv.x.d_b1-01.S", + "rv64i_m/D/src/fmv.x.d_b22-01.S", + "rv64i_m/D/src/fmv.x.d_b23-01.S", + "rv64i_m/D/src/fmv.x.d_b24-01.S", + "rv64i_m/D/src/fmv.x.d_b27-01.S", + "rv64i_m/D/src/fmv.x.d_b28-01.S", + "rv64i_m/D/src/fmv.x.d_b29-01.S", + "rv64i_m/D/src/fnmadd.d_b14-01.S", + "rv64i_m/D/src/fnmadd.d_b16-01.S", + "rv64i_m/D/src/fnmadd.d_b17-01.S", + "rv64i_m/D/src/fnmadd.d_b18-01.S", + "rv64i_m/D/src/fnmadd.d_b2-01.S", + "rv64i_m/D/src/fnmadd.d_b3-01.S", + "rv64i_m/D/src/fnmadd.d_b4-01.S", + "rv64i_m/D/src/fnmadd.d_b5-01.S", + "rv64i_m/D/src/fnmadd.d_b6-01.S", + "rv64i_m/D/src/fnmadd.d_b7-01.S", + "rv64i_m/D/src/fnmadd.d_b8-01.S", + "rv64i_m/D/src/fnmsub.d_b14-01.S", + "rv64i_m/D/src/fnmsub.d_b16-01.S", + "rv64i_m/D/src/fnmsub.d_b17-01.S", + "rv64i_m/D/src/fnmsub.d_b18-01.S", + "rv64i_m/D/src/fnmsub.d_b2-01.S", + "rv64i_m/D/src/fnmsub.d_b3-01.S", + "rv64i_m/D/src/fnmsub.d_b4-01.S", + "rv64i_m/D/src/fnmsub.d_b5-01.S", + "rv64i_m/D/src/fnmsub.d_b6-01.S", + "rv64i_m/D/src/fnmsub.d_b7-01.S", + "rv64i_m/D/src/fnmsub.d_b8-01.S", + "rv64i_m/D/src/fsgnj.d_b1-01.S", + "rv64i_m/D/src/fsgnjn.d_b1-01.S", + "rv64i_m/D/src/fsgnjx.d_b1-01.S", + // "rv64i_m/D/src/fsqrt.d_b1-01.S", + // "rv64i_m/D/src/fsqrt.d_b20-01.S", + // "rv64i_m/D/src/fsqrt.d_b2-01.S", + // "rv64i_m/D/src/fsqrt.d_b3-01.S", + // "rv64i_m/D/src/fsqrt.d_b4-01.S", + // "rv64i_m/D/src/fsqrt.d_b5-01.S", + // "rv64i_m/D/src/fsqrt.d_b7-01.S", + // "rv64i_m/D/src/fsqrt.d_b8-01.S", + // "rv64i_m/D/src/fsqrt.d_b9-01.S", + "rv64i_m/D/src/fssub.d_b10-01.S", + "rv64i_m/D/src/fssub.d_b1-01.S", + "rv64i_m/D/src/fssub.d_b11-01.S", + "rv64i_m/D/src/fssub.d_b12-01.S", + "rv64i_m/D/src/fssub.d_b13-01.S", + "rv64i_m/D/src/fssub.d_b2-01.S", + "rv64i_m/D/src/fssub.d_b3-01.S", + "rv64i_m/D/src/fssub.d_b4-01.S", + "rv64i_m/D/src/fssub.d_b5-01.S", + "rv64i_m/D/src/fssub.d_b7-01.S", + "rv64i_m/D/src/fssub.d_b8-01.S" }; string arch32priv[] = '{ @@ -1310,7 +1310,7 @@ string imperas32f[] = '{ "rv32i_m/F/src/fle_b19-01.S", "rv32i_m/F/src/flt_b1-01.S", "rv32i_m/F/src/flt_b19-01.S", - "rv32i_m/F/src/flw-align-01.S", + // "rv32i_m/F/src/flw-align-01.S", "rv32i_m/F/src/fmadd_b1-01.S", "rv32i_m/F/src/fmadd_b14-01.S", // "rv32i_m/F/src/fmadd_b15-01.S", @@ -1407,8 +1407,8 @@ string imperas32f[] = '{ "rv32i_m/F/src/fsub_b4-01.S", "rv32i_m/F/src/fsub_b5-01.S", "rv32i_m/F/src/fsub_b7-01.S", - "rv32i_m/F/src/fsub_b8-01.S", - "rv32i_m/F/src/fsw-align-01.S" + "rv32i_m/F/src/fsub_b8-01.S" + // "rv32i_m/F/src/fsw-align-01.S" }; diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index a9a442d38..fffa0e454 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -26,10 +26,11 @@ fsd_fld_tempfix: build_arch: fsd_fld_tempfix riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser rm -rf $(arch_workdir)/rv$(XLEN)i_m - mv -f $(work_dir)/rv$(XLEN)i_m $(arch_workdir)/ + rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" + rsync -a $(work_dir)/rv64i_m/ $(arch_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" build_wally: - riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run + riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run 2>&1 | tee log.txt rm -rf $(wally_workdir)/rv$(XLEN)i_m mv -f $(work_dir)/rv$(XLEN)i_m $(wally_workdir)/ diff --git a/tests/riscof/sail_cSim/riscof_sail_cSim.py b/tests/riscof/sail_cSim/riscof_sail_cSim.py index dc3033ab3..683b816b3 100644 --- a/tests/riscof/sail_cSim/riscof_sail_cSim.py +++ b/tests/riscof/sail_cSim/riscof_sail_cSim.py @@ -94,7 +94,7 @@ class sail_cSim(pluginTemplate): execute = "@cd "+testentry['work_dir']+";" - cmd = self.compile_cmd.format(testentry['isa'].lower(), self.xlen) + ' ' + test + ' -o ' + elf + cmd = self.compile_cmd.format(testentry['isa'].lower().replace('zicsr', ' ', 1), self.xlen) + ' ' + test + ' -o ' + elf compile_cmd = cmd + ' -D' + " -D".join(testentry['macros']) execute+=compile_cmd+";" diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py index fd4293954..4f74c72f4 100644 --- a/tests/riscof/spike/riscof_spike.py +++ b/tests/riscof/spike/riscof_spike.py @@ -151,7 +151,7 @@ class spike(pluginTemplate): # substitute all variables in the compile command that we created in the initialize # function - cmd = self.compile_cmd.format(testentry['isa'].lower(), self.xlen, test, elf, compile_macros) + cmd = self.compile_cmd.format(testentry['isa'].lower().replace('zicsr', ' ', 2), self.xlen, test, elf, compile_macros) # if the user wants to disable running the tests and only compile the tests, then # the "else" clause is executed below assigning the sim command to simple no action From c29a60c198cda7d861e34945a719cac3b7bac6bf Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Thu, 21 Jul 2022 21:10:15 -0700 Subject: [PATCH 078/138] changed gitignore, updated version of arch tests on main build --- .gitignore | 4 ---- addins/riscv-arch-test | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 9c6691eb3..0c8a1655a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,12 +7,8 @@ __pycache__/ .vscode/ #External repos -addins addins/riscv-arch-test/Makefile.include addins/riscv-tests/target -addins/coremark/work/* -addins/embench/bd_speed/* -addins/embench/bd_size/* benchmarks/embench/wally*.json #vsim work files to ignore diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index be67c99bd..e5020bf7b 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 +Subproject commit e5020bf7b345f8efb96c6c939de3162525b7f545 From d22587090bfcab76ef7572ddfc5eb0a1dc8bd105 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 22 Jul 2022 04:29:27 +0000 Subject: [PATCH 079/138] Reset MSR on read --- pipelined/src/uncore/uartPC16550D.sv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv index 89bdc837d..5eabe60c7 100644 --- a/pipelined/src/uncore/uartPC16550D.sv +++ b/pipelined/src/uncore/uartPC16550D.sv @@ -206,6 +206,8 @@ module uartPC16550D( // Modem Status Register (8.6.8) if (~MEMWb & (A == 3'b110)) MSR <= #1 Din[3:0]; + else if (~MEMRb & (A == 3'b110)) + MSR <= #1 4'b0; // Reading MSR clears the flags in MSR bits 3:0 else begin MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready From df568fd202bf53be651e13183810dd3153546e1f Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 22 Jul 2022 07:10:39 -0700 Subject: [PATCH 080/138] Added PLIC and UART tests and new functions to the test library --- addins/riscv-arch-test | 2 +- pipelined/testbench/tests.vh | 6 +- .../rv32i_m/privilege/Makefrag | 2 + .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 81 +++++++++++++++++-- 4 files changed, 81 insertions(+), 10 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index e5020bf7b..be67c99bd 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit e5020bf7b345f8efb96c6c939de3162525b7f545 +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index b10bb951b..a2145835d 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1603,9 +1603,9 @@ string wally32i[] = '{ string wally32periph[] = '{ `WALLYTEST, - "rv32i_m/privilege/WALLY-gpio-01", - "rv32i_m/privilege/WALLY-clint-01" - // "rv32i_m/privilege/WALLY-plic-01" + // "rv32i_m/privilege/WALLY-gpio-01", + // "rv32i_m/privilege/WALLY-clint-01" + "rv32i_m/privilege/WALLY-plic-01" // "rv32i_m/privilege/WALLY-uart-01" }; diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag index 56b3bc01f..23806cf67 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag @@ -55,6 +55,8 @@ target_tests_nosim = \ WALLY-status-tw-01 \ WALLY-gpio-01 \ WALLY-clint-01 \ + WALLY-plic-01 \ + WALLY-uart-01 \ rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests)) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index 7e112c917..17ff15c02 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -954,6 +954,17 @@ read08_test: // address to read in t3, expected 8 bit value in t4 (unused, but there for your perusal). li t2, 0xBAD // bad value that will be overwritten on good reads. lb t2, 0(t3) + andi t2, t2, 0xFF // mask to lower 8 bits + sw t2, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop // go to next test case + +read04_test: + // address to read in t3, expected 8 bit value in t4 (unused, but there for your perusal). + li t2, 0xBAD // bad value that will be overwritten on good reads. + lb t2, 0(t3) + andi t2, t2, 15 // mask lower 4 bits sw t2, 0(t1) addi t1, t1, 4 addi a6, a6, 4 @@ -974,6 +985,18 @@ readsip_test: // read the MIP into the signature j test_loop // go to next test case claim_m_plic_interrupts: // clears one non-pending PLIC interrupt + li t2, 0x0C00000C // GPIO priority + li t3, 7 + lw t4, 0(t2) + sw t3, 0(t2) + sw t4, -4(sp) + addi sp, sp, -4 + li t2, 0x0C000028 // UART priority + li t3, 7 + lw t4, 0(t2) + sw t3, 0(t2) + sw t4, -4(sp) + addi sp, sp, -4 li t2, 0x0C002000 li t3, 0x0C200004 li t4, 0xFFF @@ -982,9 +1005,28 @@ claim_m_plic_interrupts: // clears one non-pending PLIC interrupt lw t5, 0(t3) // make PLIC claim sw t5, 0(t3) // complete claim made sw t6, 0(t2) // restore saved enable status + li t2, 0x0C00000C // GPIO priority + li t3, 0x0C000028 // UART priority + lw t4, 4(sp) // load stored GPIO and UART priority + lw t5, 0(sp) + addi sp, sp, 8 // restore stack pointer + sw t4, 0(t2) + sw t5, 0(t3) j test_loop claim_s_plic_interrupts: // clears one non-pending PLIC interrupt + li t2, 0x0C00000C // GPIO priority + li t3, 7 + lw t4, 0(t2) + sw t3, 0(t2) + sw t4, -4(sp) + addi sp, sp, -4 + li t2, 0x0C000028 // UART priority + li t3, 7 + lw t4, 0(t2) + sw t3, 0(t2) + sw t4, -4(sp) + addi sp, sp, -4 li t2, 0x0C002080 li t3, 0x0C201004 li t4, 0xFFF @@ -993,25 +1035,52 @@ claim_s_plic_interrupts: // clears one non-pending PLIC interrupt lw t5, 0(t3) // make PLIC claim sw t5, 0(t3) // complete claim made sw t6, 0(t2) // restore saved enable status + li t2, 0x0C00000C // GPIO priority + li t3, 0x0C000028 // UART priority + lw t4, 4(sp) // load stored GPIO and UART priority + lw t5, 0(sp) + addi sp, sp, 8 // restore stack pointer + sw t4, 0(t2) + sw t5, 0(t3) + j test_loop + +uart_lsr_intr_wait: // waits for interrupts to be ready + li t2, 0x10000002 // IIR + li t4, 0x6 +uart_lsr_intr_loop: + lb t3, 0(t2) + andi t3, t3, 0x7 + bne t3, t4, uart_lsr_intr_loop + sw t3, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 j test_loop uart_data_wait: li t2, 0x10000005 // LSR li t3, 0x10000002 // IIR + li a4, 0x61 +uart_read_LSR_IIR: lb t4, 0(t3) // save IIR before potential clear lb t5, 0(t2) - andi t5, t5, 1 // only care if data is ready - li t6, 1 - beq t5, t6, uart_data_ready - j uart_data_wait + andi t6, t5, 0x61 // only care if all transmissions are done + bne a4, t6, uart_read_LSR_IIR uart_data_ready: - sb t4, 0(t1) - sb t5, 1(t1) + li t2, 0 + sw t2, 0(t1) // clear entry deadbeef from memory + andi t5, t5, 0x9F // mask THRE and TEMT from signature + sb t4, 1(t1) // IIR + sb t5, 0(t1) // LSR addi t1, t1, 4 addi a6, a6, 4 j test_loop +uart_clearmodemintr: + li t2, 0x10000006 + lb t2, 0(t2) + j test_loop + goto_s_mode: // return to address in t3, li a0, 3 // Trap handler behavior (go to supervisor mode) From d38369e8bf3543276555736b9d20d8b0514c8970 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 22 Jul 2022 07:12:55 -0700 Subject: [PATCH 081/138] Added new PLIC and UART tests --- .../references/WALLY-plic-01.reference_output | 249 +++++ .../WALLY-plic-s-01.reference_output | 0 .../references/WALLY-uart-01.reference_output | 33 + .../rv32i_m/privilege/src/WALLY-plic-01.S | 951 ++++++++++++++++++ .../rv32i_m/privilege/src/WALLY-plic-s-01.S | 493 +++++++++ .../rv32i_m/privilege/src/WALLY-uart-01.S | 140 +++ 6 files changed, 1866 insertions(+) create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-s-01.reference_output create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output new file mode 100644 index 000000000..8c6b8ef6a --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output @@ -0,0 +1,249 @@ +00000000 # read empty MIP (1.0.0) +00000008 # check GPIO interrupt pending on intPending0 +00000000 # Claim gives no interrupt due to zero priority +00000008 # interrupt still pending due to no claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.0.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000000 # read empty MIP (1.0.2) +00000000 # check GPIO interrupt pending on intPending0 +00000000 # claim gives no interrupt due to no intPending +00000000 # no interrupts pending after clear +00000000 # still no interrupts after clear +00000000 # read empty MIP (1.1.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # interrupt still pending due to no claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.1.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.2.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.2.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.3.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.3.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.4.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.4.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.5.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.5.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.6.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.6.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.7.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set (2.0) +00000408 # gpio and uart pending +00000003 # claim gpio +00000400 # gpio no longer pending +00000000 # no interrupts pending +00000800 # MEIP set (2.1) +00000408 # gpio and uart pending +00000003 # claim gpio +00000400 # gpio no longer pending +00000000 # no interrupts pending +00000800 # MEIP set (2.2) +00000408 # gpio and uart pending +0000000A # claim uart +00000008 # uart no longer pending +00000000 # no interrupts pending +00000800 # MEIP set (2.3) +00000408 # gpio and uart pending +0000000A # claim uart +00000008 # uart no longer pending +00000000 # no interrupts pending +00000000 # MEIP empty (2.4) +00000408 # gpio and uart pending +0000000A # claim none +00000008 # gpio and uart still pending +00000000 # no interrupts pending +00000A00 # MEIP and SEIP set (3.0) +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000200 # SEIP set (3.1) +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000200 # SEIP set (3.2) +00000408 # check GPIO and UART interrupt pending on intPending0 +0000000A # claim UART +00000008 # GPIO interrupt pending after UART claimcomp +00000000 # check no interrupts pending +00000000 # read empty MIP (3.3) +00000408 # check GPIO and UART interrupt pending on intPending0 +0000000A # claim UART +00000008 # check UART interrupt pending cleared after claim +00000000 # check no interrupts pending +00000A00 # MEIP and SEIP set (4.0) +00000400 # UART interrupt pending +0000000A # claim UART +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000200 # SEIP set (4.1) +00000400 # UART interrupt pending +00000000 # nothing in claim register +00000400 # UART interrupt pending +00000000 # check no interrupts pending +00000A00 # MEIP and SEIP set (4.2) +00000400 # UART interrupt pending +0000000A # claim UART +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000800 # MEIP set (4.3) +00000400 # UART interrupt pending +0000000A # claim UART +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000800 # MEIP set (4.4) +00000400 # UART interrupt pending +0000000A # claim UART +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000200 # SEIP set (4.5) +00000400 # UART interrupt pending +00000000 # nothing in claim register +00000400 # UART interrupt pending +00000000 # check no interrupts pending +00000000 # All disabled (4.6) +00000400 # UART interrupt pending +00000000 # nothing in claim register +00000400 # UART interrupt pending +00000000 # check no interrupts pending +00000200 # SEIP set (5.0) +00000008 # GPIO interrupt pending +00000000 # nothing in claim register +00000008 # GPIO interrupt pending +00000000 # check no interrupts pending +00000A00 # MEIP and SEIP set (5.1) +00000008 # GPIO interrupt pending +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000800 # MEIP set (5.2) +00000008 # GPIO interrupt pending +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000A00 # MEIP and SEIP set (5.3) +00000008 # GPIO interrupt pending +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000800 # MEIP set (5.4) +00000008 # GPIO interrupt pending +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000200 # SEIP set (5.5) +00000008 # GPIO interrupt pending +00000000 # nothing in claim register +00000008 # GPIO interrupt pending +00000000 # check no interrupts pending +00000000 # read empty MIP (5.6) +00000008 # GPIO interrupt pending +00000000 # nothing in claim register +00000008 # GPIO interrupt pending +00000000 # check no interrupts pending +0000000b # written due to goto_s_mode +00000200 # read sip (7.0) +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000200 # read sip (7.1) +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000200 # read sip (7.2) +00000408 # check GPIO and UART interrupt pending on intPending0 +0000000A # claim UART +00000008 # GPIO interrupt pending after UART claimcomp +00000000 # check no interrupts pending +00000200 # read sip (7.3) +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000000 # read sip +00000408 # check GPIO and UART interrupt pending on intPending0 +00000000 # nothing in claim register +00000408 # check GPIO and UART interrupt pending on intPending0 +00000000 # check no interrupts pending +00000200 # read sip +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000000 # read sip +00000408 # check GPIO and UART interrupt pending on intPending0 +00000000 # nothing in claim register +00000408 # check GPIO and UART interrupt pending on intPending0 +00000000 # check no interrupts pending +00000009 # output from ecall in supervisor mode +00000800 # MEIP set +00000408 # check GPIO and UART interrupt pending on intPending0 +0000000A # claim UART +00000008 # GPIO interrupt pending after UART claimcomp +00000003 # claim gives 3 for ID of GPIO +00000000 # read empty MIP # no interrupts, meip is low +00000000 # check no interrupts pending +00000800 # MEIP set +00000008 # GPIO interrupt pending after complete +00000003 # claim gives 3 for ID of GPIO +00000000 # read empty MIP # meip is zeroed +00000000 # check no interrupts pending +00000800 # MEIP set +00000400 # check GPIO interrupt pending cleared after claim +00000800 # MEIP set +00000408 # check GPIO and UART interrupt pending on intPending0 +0000000A # claim UART diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-s-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-s-01.reference_output new file mode 100644 index 000000000..e69de29bb diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output new file mode 100644 index 000000000..457bc35c8 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output @@ -0,0 +1,33 @@ +00000000 # Reset tests +00000001 # 00000000 *** commented because LCR should reset to zero but resets to 3 due to the FPGA +00000000 +00000060 +00000000 +00000060 # read-write test +00000020 # transmitter register empty but shift register not +00000101 # transmitter is not empty when done transmitting 5 bits +00000000 +00000060 +00000101 # Multi-bit transmission: 5 bits +00000015 +00000101 # Transmit 6 bits +0000002A +00000101 # Transmit 7 bits +0000007F +00000101 # Transmit 8 bits +00000080 +00000002 # Transmission interrupt tests +00000401 # Interrupt generated by finished transmission +00000004 +00000006 # IIR return LSR intr and LSR has an overflow error +00000063 +00000004 +00000001 +00000001 # MODEM interrupt tests +00000000 +00000011 +00000001 + + + +0000000b # ecall from test termination diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S new file mode 100644 index 000000000..81a48a23f --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S @@ -0,0 +1,951 @@ +/////////////////////////////////////////// +// +// WALLY-plic +// +// Author: David_Harris@hmc.edu and Nicholas Lucio +// +// Created 2022-06-16 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +#include "WALLY-TEST-LIB-32.h" + +INIT_TESTS + +TRAP_HANDLER m + +j run_test_loop // begin test loop/table tests instead of executing inline code. + +INIT_TEST_TABLE + +END_TESTS + +TEST_STACK_AND_DATA + +.align 2 +test_cases: +# --------------------------------------------------------------------------------------------- +# Test Contents +# +# Here is where the actual tests are held, or rather, what the actual tests do. +# each entry consists of 3 values that will be read in as follows: +# +# '.4byte [x28 Value], [x29 Value], [x30 value]' +# or +# '.4byte [address], [value], [test type]' +# +# The encoding for x30 test type values can be found in the test handler in the framework file +# +# --------------------------------------------------------------------------------------------- + +# =========== Define PLIC registers =========== + +.equ PLIC, 0x0C000000 +.equ PLIC_INTPRI_GPIO, (PLIC+0x00000C) # GPIO is interrupt 3 +.equ PLIC_INTPRI_UART, (PLIC+0x000028) # UART is interrupt 10 +.equ PLIC_INTPENDING0, (PLIC+0x001000) # intPending0 register +.equ PLIC_INTEN00, (PLIC+0x002000) # interrupt enables for context 0 (machine mode) sources 31:1 +.equ PLIC_INTEN10, (PLIC+0x002080) # interrupt enables for context 1 (supervisor mode) sources 31:1 +.equ PLIC_THRESH0, (PLIC+0x200000) # Priority threshold for context 0 (machine mode) +.equ PLIC_CLAIM0, (PLIC+0x200004) # Claim/Complete register for context 0 +.equ PLIC_THRESH1, (PLIC+0x201000) # Priority threshold for context 1 (supervisor mode) +.equ PLIC_CLAIM1, (PLIC+0x201004) # Claim/Complete register for context 1 + +# =========== Define GPIO registers =========== + +.equ GPIO, 0x10060000 +.equ input_val, (GPIO+0x00) +.equ input_en, (GPIO+0x04) +.equ output_en, (GPIO+0x08) +.equ output_val, (GPIO+0x0C) +.equ rise_ie, (GPIO+0x18) +.equ rise_ip, (GPIO+0x1C) +.equ fall_ie, (GPIO+0x20) +.equ fall_ip, (GPIO+0x24) +.equ high_ie, (GPIO+0x28) +.equ high_ip, (GPIO+0x2C) +.equ low_ie, (GPIO+0x30) +.equ low_ip, (GPIO+0x34) +.equ iof_en, (GPIO+0x38) +.equ iof_sel, (GPIO+0x3C) +.equ out_xor, (GPIO+0x40) + +# =========== Define UART registers =========== + +.equ UART, 0x10000000 +.equ UART_IER, (UART+0x01) +.equ UART_MCR, (UART+0x04) +.equ UART_MSR, (UART+0x06) + +# =========== Initialize UART and GPIO =========== + +# GPIO Initialization +.4byte input_en, 0x00000001, write32_test # enable bit 0 of input_en +.4byte output_en, 0x00000001, write32_test # enable bit 0 of output_en +.4byte output_val, 0x00000000, write32_test # make sure output_val is 0 +.4byte rise_ie, 0x00000001, write32_test # enable rise interrupts + +# =========== Initialize relevant PLIC registers =========== + +.4byte PLIC_INTPRI_GPIO, 0x00000000, write32_test # set GPIO priority to zero +.4byte PLIC_INTPRI_UART, 0x00000000, write32_test # set UART priority to zero +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000007, write32_test # set s-mode threshold to max + +# =========== Machine-Mode Priority Testing (1.T.X) =========== + +# Test 1.0.0: GPIO int lacks priority (0 = 0) +.4byte PLIC_THRESH0, 0x00000000, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.0.1: GPIO int has priority (1 > 0) +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.0.2: meip and c/c clear without interrupt pending +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.1.0: GPIO lacks priority (1 = 1) +.4byte PLIC_THRESH0, 0x00000001, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.1.1: GPIO int has priority (2 > 1) +.4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.2.0: GPIO int lacks priority (2 = 2) +.4byte PLIC_THRESH0, 0x00000002, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.2.1: GPIO int has priority (3 > 2) +.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.3.0: GPIO int lacks priority (3 = 3) +.4byte PLIC_THRESH0, 0x00000003, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.3.1: GPIO int has priority (4 > 3) +.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.4.0: GPIO int lacks priority (4 = 4) +.4byte PLIC_THRESH0, 0x00000004, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.4.1: GPIO int has priority (5 > 4) +.4byte PLIC_INTPRI_GPIO, 0x00000005, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.5.0: GPIO int lacks priority (5 = 5) +.4byte PLIC_THRESH0, 0x00000005, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.5.1: GPIO int has priority (6 > 5) +.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.6.0: GPIO int lacks priority (6 = 6) +.4byte PLIC_THRESH0, 0x00000006, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.6.1: GPIO int has priority (7 > 6) +.4byte PLIC_INTPRI_GPIO, 0x00000007, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.7.0: GPIO int lacks priority (7 = 7) +.4byte PLIC_THRESH0, 0x00000007, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# =========== UART vs GPIO priority (2.X) =========== + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000007, write32_test # set s-mode threshold to max +# UART Initialization +.4byte UART_IER, 0x08, write08_test # enable modem status interrupts from CTS +.4byte UART_MCR, 0x10, write08_test # enable loopback mode, RTS = 0 +.4byte UART_MSR, 0x00, write08_test # disable UART interrupt + +# Test 2.0: GPIO Priority = UART Priority + +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIOPriority = 1 +.4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UARTPriority = 1 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 2.1: GPIO Priority > UART Priority + +.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test # GPIOPriority = 3 +.4byte PLIC_INTPRI_UART, 0x00000002, write32_test # UARTPriority = 2 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 2.2: GPIO Priority < UART Priority + +.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test # GPIOPriority = 4 +.4byte PLIC_INTPRI_UART, 0x00000005, write32_test # UARTPriority = 5 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for UART +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 2.3: GPIO Priority < UART Priority + +.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # GPIOPriority = 6 +.4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UARTPriority = 7 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for UART +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 2.4: Interrupts don't have enough priority + +.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test # GPIOPriority = 4 +.4byte PLIC_INTPRI_UART, 0x00000005, write32_test # UARTPriority = 5 +.4byte PLIC_THRESH0, 0x00000006, write32_test # set m-mode threshold to 6 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# =========== SEIP tests (3.X) =========== + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 + +# Test 3.0: Cause machine and supervisor interrupts + +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIOPriority = 1 +.4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UARTPriority = 1 +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000A00, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 3.1: Suppress machine mode interrupts + +.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test # GPIOPriority = 3 +.4byte PLIC_INTPRI_UART, 0x00000002, write32_test # UARTPriority = 2 +.4byte PLIC_THRESH0, 0x00000007, write32_test # set m-mode threshold to 7 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 3.2: Cause SEIP with UART first + +.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # GPIOPriority = 6 +.4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UARTPriority = 7 +.4byte PLIC_THRESH0, 0x00000007, write32_test # set m-mode threshold to 7 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 3.3: Low SEIP due to insufficient priority + +.4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test # GPIOPriority = 2 +.4byte PLIC_INTPRI_UART, 0x00000003, write32_test # UARTPriority = 3 +.4byte PLIC_THRESH0, 0x00000004, write32_test # set m-mode threshold to 4 +.4byte PLIC_THRESH1, 0x00000005, write32_test # set s-mode threshold to 5 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# =========== UART interrupt enable tests (4.X) =========== + +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIO Priority = 1 +.4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UART Priority = 1 + +# Test 4.0: GPIO m-mode disabled + +.4byte PLIC_INTEN00, 0x00000400, write32_test # disable GPIO m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000A00, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.1: UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000008, write32_test # disable UART m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.2: GPIO s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000400, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000A00, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.3: UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000008, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.4: GPIO and UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.5: GPIO and UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.6: GPIO and UART fully disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# =========== GPIO interrupt enable tests (5.X) =========== + +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIO Priority = 1 +.4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UART Priority = 1 + +# Test 5.0: GPIO m-mode disabled + +.4byte PLIC_INTEN00, 0x00000400, write32_test # disable GPIO m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.1: UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000008, write32_test # disable UART m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000A00, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.2: GPIO s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000400, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.3: UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000008, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000A00, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.4: GPIO and UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.5: GPIO and UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.6: GPIO and UART fully disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# =========== S-mode enable tests (7.X) =========== + +.4byte 0x0, 0x0, goto_s_mode # go to s-mode. 0xb written to output +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIO Priority = 1 +.4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UART Priority = 1 + +# Test 7.0: GPIO m-mode disabled + +.4byte PLIC_INTEN00, 0x00000400, write32_test # disable GPIO m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.1: UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000008, write32_test # disable UART m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.2: GPIO s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000400, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.3: UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000008, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.4: GPIO and UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000000, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.5: GPIO and UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.6: GPIO and UART fully disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000000, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# =========== Special claim tests (8) =========== + +.4byte 0x0, 0x0, goto_m_mode # write 0x9 to output + +.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # GPIO Priority = 6 +.4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UART Priority = 7 +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte PLIC_THRESH0, 0x00000005, write32_test # set m-mode threshold to 5 + +# Test 8 + +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # read interrupt pending +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # claim UART +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # UART interrupt cleared +.4byte PLIC_CLAIM0, 0x00000003, read32_test # claim GPIO +.4byte 0x0, 0x00000000, readmip_test # no interrupts, meip is low +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # both interrupts claimed +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete GPIO +.4byte 0x0, 0x00000800, readmip_test # GPIO interrupt sets MEIP +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # GPIO bit is set +.4byte PLIC_CLAIM0, 0x00000003, read32_test # claim GPIO again +.4byte 0x0, 0x00000000, readmip_test # meip is zeroed +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # both interrupts claimed +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete UART claim +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # UART pending +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete GPIO claim +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # GPIO and UART pending +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # claim UART + +.4byte 0x0, 0x0, terminate_test # terminate tests diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S new file mode 100644 index 000000000..45b87c4b1 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S @@ -0,0 +1,493 @@ +/////////////////////////////////////////// +// +// WALLY-gpio +// +// Author: David_Harris@hmc.edu and Nicholas Lucio +// +// Created 2022-06-16 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +#include "WALLY-TEST-LIB-32.h" + +INIT_TESTS + +TRAP_HANDLER m + +j run_test_loop // begin test loop/table tests instead of executing inline code. + +INIT_TEST_TABLE + +END_TESTS + +TEST_STACK_AND_DATA + +.align 2 +test_cases: +# --------------------------------------------------------------------------------------------- +# Test Contents +# +# Here is where the actual tests are held, or rather, what the actual tests do. +# each entry consists of 3 values that will be read in as follows: +# +# '.4byte [x28 Value], [x29 Value], [x30 value]' +# or +# '.4byte [address], [value], [test type]' +# +# The encoding for x30 test type values can be found in the test handler in the framework file +# +# --------------------------------------------------------------------------------------------- + +# =========== Define PLIC registers =========== + +.equ PLIC, 0x0C000000 +.equ PLIC_INTPRI_GPIO, (PLIC+0x00000C) # GPIO is interrupt 3 +.equ PLIC_INTPRI_UART, (PLIC+0x000028) # UART is interrupt 10 +.equ PLIC_INTPENDING0, (PLIC+0x001000) # intPending0 register +.equ PLIC_INTEN00, (PLIC+0x002000) # interrupt enables for context 0 (machine mode) sources 31:1 +.equ PLIC_INTEN10, (PLIC+0x002080) # interrupt enables for context 1 (supervisor mode) sources 31:1 +.equ PLIC_THRESH0, (PLIC+0x200000) # Priority threshold for context 0 (machine mode) +.equ PLIC_CLAIM0, (PLIC+0x200004) # Claim/Complete register for context 0 +.equ PLIC_THRESH1, (PLIC+0x201000) # Priority threshold for context 1 (supervisor mode) +.equ PLIC_CLAIM1, (PLIC+0x201004) # Claim/Complete register for context 1 + +# =========== Define GPIO registers =========== + +.equ GPIO, 0x10060000 +.equ input_val, (GPIO+0x00) +.equ input_en, (GPIO+0x04) +.equ output_en, (GPIO+0x08) +.equ output_val, (GPIO+0x0C) +.equ rise_ie, (GPIO+0x18) +.equ rise_ip, (GPIO+0x1C) +.equ fall_ie, (GPIO+0x20) +.equ fall_ip, (GPIO+0x24) +.equ high_ie, (GPIO+0x28) +.equ high_ip, (GPIO+0x2C) +.equ low_ie, (GPIO+0x30) +.equ low_ip, (GPIO+0x34) +.equ iof_en, (GPIO+0x38) +.equ iof_sel, (GPIO+0x3C) +.equ out_xor, (GPIO+0x40) + +# =========== Define UART registers =========== + +.equ UART, 0x10000000 +.equ UART_IER, (UART+0x01) +.equ UART_MCR, (UART+0x04) +.equ UART_MSR, (UART+0x06) + +# =========== Initialize UART and GPIO =========== + +# GPIO Initialization +.4byte input_en, 0x00000001, write32_test # enable bit 0 of input_en +.4byte output_en, 0x00000001, write32_test # enable bit 0 of output_en +.4byte output_val, 0x00000000, write32_test # make sure output_val is 0 +.4byte rise_ie, 0x00000001, write32_test # enable rise interrupts + +# UART Initialization +.4byte UART_IER, 0x08, write08_test # enable modem status interrupts from CTS +.4byte UART_MCR, 0x10, write08_test # enable loopback mode, RTS = 0 +.4byte UART_MSR, 0x00, write08_test # disable UART interrupt + +# =========== Initialize relevant PLIC registers =========== + +.4byte PLIC_INTPRI_GPIO, 0x00000000, write32_test # set GPIO priority to zero +.4byte PLIC_INTPRI_UART, 0x00000000, write32_test # set UART priority to zero +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000007, write32_test # set s-mode threshold to max + +# =========== Machine-Mode Priority Testing (1.T.X) =========== + +# Test 1.0.0: GPIO int lacks priority (0 = 0) +.4byte PLIC_THRESH0, 0x00000000, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect no interrupt pending *** pending bug????? +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.0.1: GPIO int has priority (1 > 0) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.0.2: meip and c/c clear without interrupt pending +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete + +# Test 1.1.0: GPIO lacks priority (1 = 1) +.4byte PLIC_THRESH0, 0x00000001, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.1.1: GPIO int has priority (2 > 1) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.2.0: GPIO int lacks priority (2 = 2) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000002, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.2.1: GPIO int has priority (3 > 2) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.3.0: GPIO int lacks priority (3 = 3) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000003, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.3.1: GPIO int has priority (4 > 3) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.4.0: GPIO int lacks priority (4 = 4) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000004, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.4.1: GPIO int has priority (5 > 4) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000005, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.5.0: GPIO int lacks priority (5 = 5) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000005, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.5.1: GPIO int has priority (6 > 5) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.6.0: GPIO int lacks priority (6 = 6) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000006, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.6.1: GPIO int has priority (7 > 6) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000007, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.7.0: GPIO int lacks priority (7 = 7) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000007, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# # =========== UART vs GPIO priority (2.X) =========== *** + +# .4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +# .4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +# .4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +# .4byte PLIC_THRESH1, 0x00000007, write32_test # set s-mode threshold to max + +# # Test 2.0: GPIO Priority = UART Priority + +# .4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIOPriority = 1 +# .4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UARTPriority = 1 +# .4byte 0x0, 0x00000800, readmip_test # read mip +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # GPIO claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000200, read32_test # GPIO interrupt was cleared +# .4byte 0x0, 0x00000800, readmip_test # expect mip to remain high +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # UART claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # UART interrupt pending was cleared +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 2.1: GPIO Priority < UART Priority + +# .4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test # GPIO Priority = 2 +# .4byte PLIC_INTPRI_UART, 0x00000003, write32_test # UART Priority = 3 +# .4byte 0x0, 0x00000800, readmip_test # read mip +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for UART and GPIO +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # UART claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000008, read32_test # UART interrupt was cleared +# .4byte 0x0, 0x00000800, readmip_test # expect mip to remain high +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # GPIO claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # GPIO interrupt pending was cleared +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 2.2: GPIO Priority > UART Priority + +# .4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000005, write32_test # GPIO Priority = 5 +# .4byte PLIC_INTPRI_UART, 0x00000004, write32_test # UART Priority = 4 +# .4byte 0x0, 0x00000800, readmip_test # read mip +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # GPIO claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000200, read32_test # GPIO interrupt was cleared +# .4byte 0x0, 0x00000800, readmip_test # expect mip to remain high +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # UART claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # UART interrupt pending was cleared +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 2.3: GPIO Priority < UART Priority (2) + +# .4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # GPIO Priority = 6 +# .4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UART Priority = 7 +# .4byte 0x0, 0x00000800, readmip_test # read mip +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for UART and GPIO +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # UART claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000008, read32_test # UART interrupt was cleared +# .4byte 0x0, 0x00000800, readmip_test # expect mip to remain high +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # GPIO claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # GPIO interrupt pending was cleared +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 2.4: Interrupts disabled (Thresh0 = 7) + +# .4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_THRESH0, 0x00000007, write32_test # Disable m-mode interrupts +# .4byte PLIC_INTPRI_GPIO, 0x00000007, write32_test # GPIO Priority = 7 +# .4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UART Priority = 7 +# .4byte 0x0, 0x00000000, readmip_test, # read mip +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending +# .4byte 0x0, 0x00000000, readmclaimcomplete_test # no interrupt pending +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # =========== SEIP tests (3.X) =========== + +# .4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +# .4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +# .4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +# .4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 + +# # Test 3.0: Cause machine and supervisor interrupts + +# .4byte output_val, 0x00000000, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIO Priority = 1 +# .4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UART Priority = 1 +# .4byte 0x0, 0x00000A00, readmip_test # Expect high on MEIP and SEIP +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # Expect GPIO on claim/complete +# .4byte 0x0, 0x00000A00, readmip_test # Still expect high on MEIP and SEIP +# .4byte PLIC_INTPENDING0, 0x00000200, read32_test # GPIO interrupt was cleared +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # Expect UART on claim/complete +# .4byte 0x0, 0x00000000, readmip_test # all interrupts were cleared +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # Pending should also be clear +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 3.1: Suppress machine mode interrupts + +# .4byte output_val, 0x00000000, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test # GPIO Priority = 3 +# .4byte PLIC_INTPRI_UART, 0x00000002, write32_test # UART Priority = 2 +# .4byte PLIC_THRESH0, 0x00000007, write32_test # set m-mode threshold to 7 +# .4byte 0x0, 0x00000200, readmip_test # Expect high on SEIP only +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # Expect GPIO on claim/complete +# .4byte 0x0, 0x00000200, readmip_test # Expect high on SEIP only +# .4byte PLIC_INTPENDING0, 0x00000200, read32_test # GPIO interrupt was cleared +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # Expect UART on claim/complete +# .4byte 0x0, 0x00000000, readmip_test # all interrupts were cleared +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # Pending should also be clear +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 3.2: Cause SEIP with UART first + +# .4byte output_val, 0x00000000, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # GPIO Priority = 6 +# .4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UART Priority = 7 +# .4byte 0x0, 0x00000200, readmip_test # Expect high on SEIP only +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # Expect UART on claim/complete +# .4byte 0x0, 0x00000200, readmip_test # Expect high on SEIP only +# .4byte PLIC_INTPENDING0, 0x00000008, read32_test # UART interrupt was cleared +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # Expect GPIO on claim/complete +# .4byte 0x0, 0x00000000, readmip_test # all interrupts were cleared +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # Pending should also be clear +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 3.3: Low SEIP due to insufficient priority + +# .4byte output_val, 0x00000000, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test # GPIO Priority = 2 +# .4byte PLIC_INTPRI_UART, 0x00000003, write32_test # UART Priority = 3 +# .4byte PLIC_THRESH0, 0x00000004, write32_test # set m-mode threshold to 7 +# .4byte PLIC_THRESH1, 0x00000005, write32_test # set s-mode threshold to 7 +# .4byte 0x0, 0x00000000, readmip_test # no interrupt pending +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending +# .4byte 0x0, 0x00000000, readmclaimcomplete_test # Expect nothing on claim/complete +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # =========== UART interrupt enable tests (4.X) =========== + +# .4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +# .4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +# .4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIO Priority = 1 +# .4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UART Priority = 1 + +# # Test 4.0: GPIO m-mode disabled + +# .4byte output_val, 0x00000000, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTEN00, 0x000000200, write32_test # GPIO m-mode interrupt disabled +# .4byte PLIC_INTEN00, 0x000000208, write32_test # No s-mode interrupt disabled +# .4byte 0x0, 0x00000A00, readmip_test # Expect high on MEIP from UART and SEIP from GPIO +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # Expect GPIO on claim/complete +# .4byte 0x0, 0x00000200, readmip_test # Expect high on MEIP and SEIP from UART +# .4byte PLIC_INTPENDING0, 0x00000200, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # Expect UART on claim/complete +# .4byte 0x0, 0x00000000, readmip_test # all interrupts were cleared +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # Pending should also be clear + +# Test 4.1: UART m-mode disabled + + + +# Test 4.2: GPIO s-mode disabled + + + +# Test 4.3: UART s-mode disabled + + + +# Test 4.4: GPIO and UART s-mode disabled + + + +# Test 4.5: GPIO and UART m-mode disabled + + + +# Test 4.6: GPIO and UART fully disabled + +.4byte 0x0, 0x0, terminate_test # terminate tests diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S new file mode 100644 index 000000000..08b1dc25e --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S @@ -0,0 +1,140 @@ +/////////////////////////////////////////// +// +// WALLY-uart +// +// Author: David_Harris@hmc.edu and Nicholas Lucio +// +// Created 2022-06-16 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +#include "WALLY-TEST-LIB-32.h" + +INIT_TESTS + +TRAP_HANDLER m + +j run_test_loop // begin test loop/table tests instead of executing inline code. + +INIT_TEST_TABLE + +END_TESTS + +TEST_STACK_AND_DATA + +.align 2 + +.equ UART, 0x10000000 +.equ UART_RBR, (UART) +.equ UART_THR, (UART) +.equ UART_IER, (UART+0x01) +.equ UART_IIR, (UART+0x02) +.equ UART_FCR, (UART+0x02) +.equ UART_LCR, (UART+0x03) +.equ UART_MCR, (UART+0x04) +.equ UART_LSR, (UART+0x05) +.equ UART_MSR, (UART+0x06) +.equ UART_Scr, (UART+0x07) + +test_cases: +# --------------------------------------------------------------------------------------------- +# Test Contents +# +# Here is where the actual tests are held, or rather, what the actual tests do. +# each entry consists of 3 values that will be read in as follows: +# +# '.4byte [x28 Value], [x29 Value], [x30 value]' +# or +# '.4byte [address], [value], [test type]' +# +# The encoding for x30 test type values can be found in the test handler in the framework file +# +# --------------------------------------------------------------------------------------------- + +# =========== UART resets to correct values on master reset =========== + +.4byte UART_IER, 0x00, read08_test +.4byte UART_IIR, 0x01, read08_test # IIR resets to 1 +# .4byte UART_LCR, 0x00, read08_test *** commented out because LCR should reset to zero but resets to 3 +.4byte UART_MCR, 0x00, read08_test +.4byte UART_LSR, 0x60, read08_test # LSR resets with transmit status bits set +.4byte UART_MSR, 0x00, read04_test + +# =========== Basic read-write =========== + +.4byte UART_LCR, 0x00, write08_test # set LCR to reset value *** remove if UART resets to correct value +.4byte UART_MCR, 0x10, write08_test # put UART into loopback for MSR test +.4byte UART_LSR, 0x60, read08_test +.4byte UART_THR, 0x00, write08_test # write value to UART +.4byte UART_LSR, 0x00, read08_test # data not ready and transmitter is not empty +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and LSR +.4byte UART_RBR, 0x00, read08_test # read written value +.4byte UART_LSR, 0x60, read08_test # read LSR + +# =========== Different size read-write =========== + +# Transmit 5 bits + +.4byte UART_LCR, 0x00, write08_test # set LCR to transmit 5 bits +.4byte UART_THR, 0x55, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x15, read08_test # read written value without bits 5-7 + +# Transmit 6 bits + +.4byte UART_LCR, 0x01, write08_test # set LCR to transmit six bits +.4byte UART_THR, 0xAA, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x2A, read08_test # read written value without bits 6 & 7 + +# Transmit 7 bits + +.4byte UART_LCR, 0x02, write08_test # set LCR to transmit seven bits +.4byte UART_THR, 0xFF, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x7F, read08_test # read written value without bit 7 + +# Transmit 8 bits + +.4byte UART_LCR, 0x03, write08_test # set LCR to transmit seven bits +.4byte UART_THR, 0x80, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x80, read08_test # read full written value + sign extension + +# =========== Transmit-related interrupts =========== + +.4byte UART_IER, 0x07, write08_test # enable data available, buffer empty, and line status interrupts +.4byte UART_IIR, 0x02, read08_test # buffer should be empty, causing interrupt +.4byte UART_THR, 0x00, write08_test # write zeroes to transmitter +.4byte 0x0, 0x0401, uart_data_wait # IIR should have data ready interrupt +.4byte UART_THR, 0x01, write08_test # write 1 to transmitter buffer +.4byte UART_IIR, 0x04, read08_test # data interrupt should still be high +.4byte 0x0, 0x06, uart_lsr_intr_wait # wait for transmission to complete, IIR should throw error due to overrun error. +.4byte UART_LSR, 0x63, read08_test # read overrun error from LSR +.4byte UART_IIR, 0x04, read08_test # check that LSR interrupt was cleared +.4byte UART_RBR, 0x01, read08_test # read previous value from UART + +# =========== MODEM interrupts =========== + +.4byte UART_MSR, 0x00, write08_test # clear MSR +.4byte UART_IER, 0x08, write08_test # enable MODEM Status interrupts +.4byte UART_IIR, 0x01, read08_test # no interrupts pending +.4byte UART_MCR, 0x02, write08_test # Cause DCTS interrupt +.4byte UART_IIR, 0x00, read08_test # MODEM interrupt +.4byte UART_MSR, 0x11, read08_test # Read MSR to clear interrupt +.4byte UART_IIR, 0x01, read08_test # interrupt cleared by reading MSR + +.4byte 0x0, 0x0, terminate_test \ No newline at end of file From e2691c02b75b37815c62a4cc971206302e1290d8 Mon Sep 17 00:00:00 2001 From: cturek Date: Fri, 22 Jul 2022 16:45:19 +0000 Subject: [PATCH 082/138] Square root negative exponent handling --- pipelined/srt/sqrttestgen | Bin 22792 -> 22792 bytes pipelined/srt/sqrttestgen.c | 33 ++++++++++++++++++++++++--------- pipelined/srt/srt-waves.do | 1 + pipelined/srt/srt.sv | 22 ++++++++++++---------- pipelined/srt/testbench.sv | 8 ++++---- 5 files changed, 41 insertions(+), 23 deletions(-) diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen index 45fc6e7868a87d7e5f3b9308e340b3600a392273..7c6efb9f9085d76783d4192e98ba9cd8c44b8d24 100755 GIT binary patch delta 1737 zcmY+FYfKzf6vyw~T^q z&i&tW&OLjR9lk_|FVSGs+36p|kV!+QwHtdatqe_tAad?tOwB zX?aW+a%MJbA=|;UOmB{tD3PfsyPYu`2o@6(7p1g?G;ZBPUeh!=Uqx1sXOY{HBgh@d zVdPHakC6`}UqtRg{xHRXp`++HopQ*1Jo_nxVGi#g1UUo|LL5dA&Tu%6Fvg)DVVpw` z!UTtI1l=$Gp~nMo<*o(Ov;fveVa-+hQ>vjpo5$hj!ck+YC5BioRp$hYwPJaQa4hWsnCfxMXF4MQv7h{y@kPRQh8cp2dr z+4Zc_Kp!lPvq{qoQ+QN{3af~mskHR zG~t4dfn&*<-qGNd8K0uwFk~o7omQh$iJ~dPO{e=8%>W@~;DL7PJa;zcUhCH?+$An{ zOg%H>uM~?0`NfG?#4ft>Jr1du_>>bnxP9``vKz|4Z<)r;gtp-NidJ{tG|dobixZ~V z1-2b5Kb;PB>0o!k?-T?37gz#am}WnS50{O>sVeZ2<_t*!vlG70zz46@8e3s*hEHb= z()N;ih@Oe@WM|mi<{|BZLzl zvZYxkuIak>8|^^dV7gOY*UZ%v+)b>seyg(`_kC<`_XZZD8?~PLM<`VQ^Xww&0PCST zquxA+n{1Lao43+VqpZ5gLuJ<1bQRVm&7S=b_nSIf1~$(by=IFR9Lw0qHml1 EKl{>MKmY&$ delta 1709 zcmZ9NZ){Ul6u|Fo_op4Nd#~F{-T!uO^RXi9oMf94tgdf+=swIq1rr?XOCvEc^@FJ~ zy3TCX)gjDW6XqAF`au?ASZJb)CTk~RC&r8!5~lc}L5OdHVfF(SEIiL`PaE(i@1FBJ z=iGblIrpZWIzy+<(3uNnkCo(x=fhv()~J|=sOZTlQ8q^}*9^&@PMkWl^zuzrR*_F54db&tX5p z0*Ac_3BPzVkq$x0^Y?T;2cR#2&ZC~lM}-r}k0CE3cOYLtZb4o{u0u{CS0QW27096t zG(1z1?|%k4jZCmTeZbG)Hxu|vtE#eGX@ zacd%?T>ZaSD*X~NYODY9&=(a=8-xsgRUAvG5oIl1ElSFo_8oZrqP)y+Bx7X9H@WK@ zA^C)or4w&wCGp5}aCmiE*-#@%ZHV32)KMAck2{iF;3w7guvWo2&;q+aQ08lHTYUfi z06V(W$oKIBe8sIhx<2jO;Ngo8Z1oqL7M%1O_J!cGC!r;Nu~SI}f4{yN&ZeYbvh=Z= zf^GA|210_Du~paoIj=Ux>MTukFYB|oUYNN*e;K5MYD%D*?y-+%emLt>BiFQHutNV5 zd{+VXON%2*g*Naj;B9^Btbd!>JjqKUVzY;>SZeL9l7slv4<6-m_t2JW>ckC`X1^WN zEkDt&@qcuE7J@w)T@Qc^faJ^DZ|iyjB=3I;r0K4%6DaT!yDL7qWf@j=f|Yp7B?*KZ zzN=t^=bIa?Fni%ku~F#}=LqcSgli>FS4!_1`YmO~2BT1JA`M_V4Br7ZCp8;LmYtWH zs!l^f7Z{7d{@5MqmcRm52is^=sT}KTI%+e^&@VpRY_+@JJa5TUTdeW%A88S7?CE4V0#_8>Ob72Bnr5U9e|_jDHfFEgjF$J~qQdf6yY Vq2eg}#Me`i!T%qq_lmFG_#ek*L> 1) + (`NE+2)'(`BIAS); + assign DExp = {2'b00, XExp} - {2'b00, YExp} + (`NE+2)'(`BIAS); + assign calcExp = Sqrt ? SExp[`NE-1:0] : DExp[`NE-1:0]; endmodule @@ -462,11 +462,13 @@ module srtpostproc( end assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0]; assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0]; - assign shiftRem = (intRem >>> (`DIVLEN - dur + 2)); - always_comb - if (Int) Result = intRes >> (`DIVLEN - dur); - else if (Mod) Result = shiftRem[`DIVLEN-1:0]; - else Result = floatRes; + assign shiftRem = (intRem >> (zeroCntD)); + always_comb begin + if (Int) begin + if (Mod) Result = shiftRem[`DIVLEN-1:0]; + else Result = intRes >> (`DIVLEN - dur); + end else Result = floatRes; + end assign calcSign = XSign ^ YSign; endmodule diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 513305b26..1b40c673a 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -53,7 +53,7 @@ module testbench; // Test parameters parameter MEM_SIZE = 40000; - parameter MEM_WIDTH = 64+64+64+64; + parameter MEM_WIDTH = 64+64+64; // Test sizes `define memr 63:0 @@ -70,9 +70,9 @@ module testbench; integer testnum, errors; // Equip Int, Sqrt, or IntMod test - assign Int = 1'b1; + assign Int = 1'b0; assign Mod = 1'b0; - assign Sqrt = 1'b0; + assign Sqrt = 1'b1; // Divider srt srt(.clk, .Start(req), @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("inttestvectors", Tests); + $readmemh ("sqrttestvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; From 95fdd408ee06d7199d2f3675242680e619671fd2 Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Fri, 22 Jul 2022 11:14:04 -0700 Subject: [PATCH 083/138] commiting current changes to riscof wally tests --- addins/riscv-arch-test | 2 +- pipelined/testbench/testbench.sv | 31 +++++++++++++------ tests/riscof/sail_cSim/env/model_test.h | 2 +- tests/riscof/sail_cSim/riscof_sail_cSim.py | 16 +++++++--- tests/riscof/spike/env/model_test.h | 2 +- .../riscv-test-suite/env/arch_test.h | 18 +++++------ .../rv32i_m/I/src/WALLY-ADD.S | 2 ++ .../rv32i_m/I/src/WALLY-SLT.S | 3 ++ .../rv32i_m/I/src/WALLY-SLTU.S | 3 ++ .../rv32i_m/I/src/WALLY-SUB.S | 3 ++ .../rv32i_m/I/src/WALLY-XOR.S | 3 ++ .../references/WALLY-trap-01.reference_output | 12 +++---- .../WALLY-trap-s-01.reference_output | 12 +++---- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 13 +++++++- .../rv32i_m/privilege/src/WALLY-amo.S | 3 +- .../rv32i_m/privilege/src/WALLY-clint-01.S | 2 ++ .../privilege/src/WALLY-csr-permission-s-01.S | 3 ++ .../privilege/src/WALLY-csr-permission-u-01.S | 3 ++ .../rv32i_m/privilege/src/WALLY-gpio-01.S | 3 ++ .../rv32i_m/privilege/src/WALLY-lrsc.S | 3 +- .../rv32i_m/privilege/src/WALLY-mie-01.S | 3 ++ .../rv32i_m/privilege/src/WALLY-minfo-01.S | 3 ++ .../rv32i_m/privilege/src/WALLY-misa-01.S | 3 ++ .../rv32i_m/privilege/src/WALLY-mmu-sv32.S | 3 ++ .../rv32i_m/privilege/src/WALLY-mtvec-01.S | 3 ++ .../rv32i_m/privilege/src/WALLY-pma.S | 3 ++ .../rv32i_m/privilege/src/WALLY-pmp.S | 5 +++ .../rv32i_m/privilege/src/WALLY-sie-01.S | 5 ++- .../src/WALLY-status-fp-enabled-01.S | 7 +++++ .../privilege/src/WALLY-status-mie-01.S | 3 ++ .../privilege/src/WALLY-status-sie-01.S | 3 ++ .../privilege/src/WALLY-status-tw-01.S | 3 ++ .../rv32i_m/privilege/src/WALLY-stvec-01.S | 3 ++ .../rv32i_m/privilege/src/WALLY-trap-01.S | 10 +++--- .../rv32i_m/privilege/src/WALLY-trap-s-01.S | 3 ++ .../privilege/src/WALLY-trap-sret-01.S | 3 ++ .../rv32i_m/privilege/src/WALLY-trap-u-01.S | 3 ++ .../rv32i_m/privilege/src/WALLY-wfi-01.S | 3 ++ .../rv64i_m/I/src/WALLY-ADD.S | 3 ++ .../rv64i_m/I/src/WALLY-SLT.S | 3 ++ .../rv64i_m/I/src/WALLY-SLTU.S | 3 ++ .../rv64i_m/I/src/WALLY-SUB.S | 3 ++ .../rv64i_m/I/src/WALLY-XOR.S | 3 ++ .../rv64i_m/privilege/src/WALLY-TEST-LIB-64.h | 4 ++- .../rv64i_m/privilege/src/WALLY-amo.S | 3 +- .../privilege/src/WALLY-csr-permission-s-01.S | 3 ++ .../privilege/src/WALLY-csr-permission-u-01.S | 3 ++ .../rv64i_m/privilege/src/WALLY-lrsc.S | 3 +- .../rv64i_m/privilege/src/WALLY-mie-01.S | 3 +- .../rv64i_m/privilege/src/WALLY-minfo-01.S | 2 ++ .../rv64i_m/privilege/src/WALLY-misa-01.S | 2 ++ .../rv64i_m/privilege/src/WALLY-mmu-sv39.S | 2 ++ .../rv64i_m/privilege/src/WALLY-mmu-sv48.S | 2 ++ .../rv64i_m/privilege/src/WALLY-mtvec-01.S | 2 ++ .../rv64i_m/privilege/src/WALLY-periph.S | 2 ++ .../rv64i_m/privilege/src/WALLY-pma.S | 2 ++ .../rv64i_m/privilege/src/WALLY-pmp.S | 2 ++ .../rv64i_m/privilege/src/WALLY-sie-01.S | 2 ++ .../src/WALLY-status-fp-enabled-01.S | 6 ++++ .../privilege/src/WALLY-status-mie-01.S | 2 ++ .../privilege/src/WALLY-status-sie-01.S | 2 ++ .../privilege/src/WALLY-status-tw-01.S | 2 ++ .../rv64i_m/privilege/src/WALLY-stvec-01.S | 2 ++ .../rv64i_m/privilege/src/WALLY-trap-01.S | 3 +- .../rv64i_m/privilege/src/WALLY-trap-s-01.S | 2 ++ .../privilege/src/WALLY-trap-sret-01.S | 2 ++ .../rv64i_m/privilege/src/WALLY-trap-u-01.S | 2 ++ .../rv64i_m/privilege/src/WALLY-wfi-01.S | 2 ++ 68 files changed, 233 insertions(+), 52 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index be67c99bd..307c77b26 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 +Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 0fb5f5e60..3c886665e 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -179,7 +179,7 @@ logic [3:0] dummy; testadr = 0; testadrNoBase = 0; // riscof tests have a different signature, tests[0] == "1" refers to RiscvArchTests and tests[0] == "2" refers to WallyRiscvArchTests - riscofTest = tests[0] == "1"; // | tests[0] == "2"; + riscofTest = tests[0] == "1" | tests[0] == "2"; // fill memory with defined values to reduce Xs in simulation // Quick note the memory will need to be initialized. The C library does not // guarantee the initialized reads. For example a strcmp can read 6 byte @@ -195,13 +195,19 @@ logic [3:0] dummy; /* if (tests[0] == `IMPERASTEST) pathname = tvpaths[0]; else pathname = tvpaths[1]; */ - memfilename = {pathname, tests[test], ".elf.memfile"}; + if (riscofTest) memfilename = {pathname, tests[test], "/ref/ref.elf.memfile"}; + else memfilename = {pathname, tests[test], ".elf.memfile"}; if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM); else $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM); if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM); - ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"}; - ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"}; + if (riscofTest) begin + ProgramAddrMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.addr"}; + ProgramLabelMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.lab"}; + end else begin + ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"}; + ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"}; + end // declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array // to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test) updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray); @@ -241,7 +247,8 @@ logic [3:0] dummy; // this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score // also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking $display("Embench Benchmark: %s is done.", tests[test]); - outputfile = {pathname, tests[test], ".sim.output"}; + if (riscofTest) outputfile = {pathname, tests[test], "/ref/ref.sim.output"}; + else outputfile = {pathname, tests[test], ".sim.output"}; outputFilePointer = $fopen(outputfile); i = 0; while ($unsigned(i) < $unsigned(5'd5)) begin @@ -256,7 +263,7 @@ logic [3:0] dummy; for(i=0; i {2}.log 2>&1;'.format(sig_file, elf, test_name) + + # Check if the tests can be run on SAIL + if ('NO_SAIL=True' in testentry['macros']): + # if the tests can't run on SAIL we copy the reference output to the src directory + reference_output = re.sub("/src/","/references/", re.sub(".S",".reference_output", test)) + execute += 'cut -c-{0:g} {1} > {2}'.format(8, reference_output, sig_file) #use cut to remove comments when copying + else: + execute += self.sail_exe[self.xlen] + ' -z268435455 -i --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name) cov_str = ' ' for label in testentry['coverage_labels']: diff --git a/tests/riscof/spike/env/model_test.h b/tests/riscof/spike/env/model_test.h index 80101da60..3890c4985 100644 --- a/tests/riscof/spike/env/model_test.h +++ b/tests/riscof/spike/env/model_test.h @@ -14,7 +14,7 @@ #define RVMODEL_HALT \ li x1, 1; \ write_tohost: \ - sw x1, tohost, t5; \ + sw x1, tohost, t0; \ j write_tohost; #define RVMODEL_BOOT diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/env/arch_test.h b/tests/wally-riscv-arch-test/riscv-test-suite/env/arch_test.h index 286a64744..4ae101822 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/env/arch_test.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/env/arch_test.h @@ -11,7 +11,7 @@ // #define rvtest_gpr_save // #endif -#define TEST_CASE_1 +// #define TEST_CASE_1 //----------------------------------------------------------------------- // RV Arch Test Macros @@ -95,14 +95,14 @@ #endif #endif -#if XLEN==64 - #if FLEN==32 - #define SREG sw - #define LREG lW - #define REGWIDTH 4 - #define MASK 0xFFFFFFFF - #endif -#endif +// #if XLEN==64 +// #if FLEN==32 +// #define SREG sw +// #define LREG lW +// #define REGWIDTH 4 +// #define MASK 0xFFFFFFFF +// #endif +// #endif #define MMODE_SIG 3 #define RLENG (REGWIDTH<<3) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-ADD.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-ADD.S index 860df5d2e..9b6561514 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-ADD.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-ADD.S @@ -19,6 +19,7 @@ #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV32I") .section .text.init .globl rvtest_entry_point @@ -27,6 +28,7 @@ RVMODEL_BOOT RVTEST_CODE_BEGIN RVTEST_SIGBASE( x6, wally_signature) +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",add) # Testcase 0: rs1:x28(0x00000000), rs2:x13(0x00000000), result rd:x25(0x00000000) li x28, MASK_XLEN(0x00000000) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLT.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLT.S index dd63389c4..10e5bc509 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLT.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLT.S @@ -19,6 +19,7 @@ #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV32I") .section .text.init .globl rvtest_entry_point @@ -28,6 +29,8 @@ RVTEST_CODE_BEGIN RVTEST_SIGBASE( x6, wally_signature) +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",slt) + # Testcase 0: rs1:x11(0x00000000), rs2:x8(0x00000000), result rd:x24(0x00000000) li x11, MASK_XLEN(0x00000000) li x8, MASK_XLEN(0x00000000) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLTU.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLTU.S index 510438de7..aa080daf9 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLTU.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLTU.S @@ -19,6 +19,7 @@ #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV32I") .section .text.init .globl rvtest_entry_point @@ -28,6 +29,8 @@ RVTEST_CODE_BEGIN RVTEST_SIGBASE( x6, wally_signature) +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",sltu) + # Testcase 0: rs1:x20(0x00000000), rs2:x26(0x00000000), result rd:x13(0x00000000) li x20, MASK_XLEN(0x00000000) li x26, MASK_XLEN(0x00000000) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SUB.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SUB.S index 1d3e81d80..b7f5fa299 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SUB.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SUB.S @@ -19,6 +19,7 @@ #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV32I") .section .text.init .globl rvtest_entry_point @@ -26,6 +27,8 @@ rvtest_entry_point: RVMODEL_BOOT RVTEST_CODE_BEGIN +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",sub) + RVTEST_SIGBASE( x6, wally_signature) # Testcase 0: rs1:x3(0x00000000), rs2:x29(0x00000000), result rd:x28(0x00000000) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-XOR.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-XOR.S index 900849761..ea658637d 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-XOR.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-XOR.S @@ -19,6 +19,7 @@ #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV32I") .section .text.init .globl rvtest_entry_point @@ -28,6 +29,8 @@ RVTEST_CODE_BEGIN RVTEST_SIGBASE( x6, wally_signature) +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",xor) + # Testcase 0: rs1:x14(0x00000000), rs2:x19(0x00000000), result rd:x9(0x00000000) li x14, MASK_XLEN(0x00000000) li x19, MASK_XLEN(0x00000000) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-01.reference_output index a9c3da2c0..2877f4a8e 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-01.reference_output @@ -6,16 +6,16 @@ 00000000 # mtval of faulting instruction (0x0) 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000003 # mcause from Breakpoint -80000168 # mtval of breakpoint instruction adress +8000015c # mtval of breakpoint instruction adress 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000004 # mcause from load address misaligned -80000171 # mtval of misaligned address +80000165 # mtval of misaligned address 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000005 # mcause from load access 00000000 # mtval of accessed adress (0x0) 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000006 # mcause from store misaligned -80000189 # mtval of address with misaligned store instr +8000017d # mtval of address with misaligned store instr 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000007 # mcause from store access 00000000 # mtval of accessed address (0x0) @@ -62,16 +62,16 @@ fffff7ff # medeleg after attempted write of all 1's (only some bits are writeabl 00000000 # mtval of faulting instruction (0x0) 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000003 # mcause from Breakpoint -80000168 # mtval of breakpoint instruction adress +8000015c # mtval of breakpoint instruction adress 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000004 # mcause from load address misaligned -80000171 # mtval of misaligned address +80000165 # mtval of misaligned address 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000005 # mcause from load access 00000000 # mtval of accessed adress (0x0) 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000006 # mcause from store misaligned -80000189 # mtval of address with misaligned store instr +8000017d # mtval of address with misaligned store instr 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000007 # mcause from store access 00000000 # mtval of accessed address (0x0) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-s-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-s-01.reference_output index c7a9bac6e..089aeba9d 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-s-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-s-01.reference_output @@ -9,16 +9,16 @@ 00000000 # stval of faulting instruction (0x0) 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000003 # scause from Breakpoint -80000168 # stval of breakpoint instruction adress +8000015c # stval of breakpoint instruction adress 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000004 # scause from load address misaligned -80000171 # stval of misaligned address +80000165 # stval of misaligned address 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000005 # scause from load access 00000000 # stval of accessed adress (0x0) 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000006 # scause from store misaligned -80000189 # stval of address with misaligned store instr +8000017d # stval of address with misaligned store instr 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000007 # scause from store access 00000000 # stval of accessed address (0x0) @@ -64,16 +64,16 @@ fffff7ff # medeleg after attempted write of all 1's (only some bits are writeabl 00000000 # stval of faulting instruction (0x0) 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000003 # scause from Breakpoint -80000168 # stval of breakpoint instruction adress +8000015c # stval of breakpoint instruction adress 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000004 # scause from load address misaligned -80000171 # stval of misaligned address +80000165 # stval of misaligned address 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000005 # scause from load access 00000000 # stval of accessed adress (0x0) 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000006 # scause from store misaligned -80000189 # stval of address with misaligned store instr +8000017d # stval of address with misaligned store instr 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000007 # scause from store access 00000000 # stval of accessed address (0x0) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index 0caad5d0b..aacb7310b 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -26,7 +26,7 @@ .macro INIT_TESTS -RVTEST_ISA("RV32I") +// RVTEST_ISA("RV32I") .section .text.init .globl rvtest_entry_point @@ -152,7 +152,15 @@ cause_s_soft_interrupt: csrs sip, t3 // set supervisor software interrupt pending. SIP is a subset of MIP, so writing this should also change MIP. ret +cause_s_soft_from_m_interrupt: + li t3, 0x2 + csrs mip, t3 // set supervisor software interrupt pending. SIP is a subset of MIP, so writing this should also change MIP. + ret + cause_m_ext_interrupt: + // these interrupts involve a time loop waiting for the interrupt to go off. + // since interrupts are not always enabled, we need to make it stop after a certain number of loops, which is the number in a3 + li a3, 0x40 // ========== Configure PLIC ========== // m priority threshold = 0 li t3, 0xC200000 @@ -189,6 +197,9 @@ m_ext_loop: ret cause_s_ext_interrupt_GPIO: + // these interrupts involve a time loop waiting for the interrupt to go off. + // since interrupts are not always enabled, we need to make it stop after a certain number of loops, which is the number in a3 + li a3, 0x40 // ========== Configure PLIC ========== // s priority threshold = 0 li t3, 0xC201000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-amo.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-amo.S index 4d4d3fee9..8a1906585 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-amo.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-amo.S @@ -24,7 +24,8 @@ #include "model_test.h" #include "arch_test.h" -RVTEST_ISA("RV64I") +RVTEST_ISA("RV32IAF") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*A.*F.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",amo) .section .text.init .globl rvtest_entry_point diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S index 4d201fc9d..69c0bb45f 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",clint) #def NO_SAIL=True; INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-csr-permission-s-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-csr-permission-s-01.S index 1df8fa249..a43531cc7 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-csr-permission-s-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-csr-permission-s-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",csr-permission-s) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-csr-permission-u-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-csr-permission-u-01.S index b7d3a78f3..ace47a806 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-csr-permission-u-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-csr-permission-u-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",csr-permission-u) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index 1b3bbdb47..5cd029165 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",gpio) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-lrsc.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-lrsc.S index 4078920bd..2ea73ed35 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-lrsc.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-lrsc.S @@ -24,7 +24,8 @@ #include "model_test.h" #include "arch_test.h" -RVTEST_ISA("RV64I") +RVTEST_ISA("RV32IA") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*A.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",lrsc) .section .text.init .globl rvtest_entry_point diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mie-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mie-01.S index c48450759..6c91f64fc 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mie-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mie-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",mie) + INIT_TESTS CAUSE_TRAP_TRIGGERS // initialize code that will cause traps for consistent mtval addresses diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-minfo-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-minfo-01.S index 56bbe5b7c..3a4626b64 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-minfo-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-minfo-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",minfo) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-misa-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-misa-01.S index e1476eaf7..888011ba6 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-misa-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-misa-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",misa) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mmu-sv32.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mmu-sv32.S index df11154dc..3bdd03ad2 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mmu-sv32.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mmu-sv32.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",mmu) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mtvec-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mtvec-01.S index 5c0f70b2d..e2865be3e 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mtvec-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mtvec-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",mtvec) + INIT_TESTS CAUSE_TRAP_TRIGGERS // initialize code that will cause traps for consistent mtval addresses diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pma.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pma.S index 1b65a56f9..3000ddcde 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pma.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pma.S @@ -36,6 +36,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",pma) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pmp.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pmp.S index dde8c80fc..cf03edd74 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pmp.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pmp.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",pmp) + INIT_TESTS TRAP_HANDLER m @@ -54,6 +57,8 @@ test_cases: # Test 12.3.2.2.1 Config: Write known values and set PMP config according to table 12.4 in the *** riscv book, copied below +.4byte 0x80100300, 0xFEEDBEEF, write32_test + # write pmpaddr regs # | Reg | pmpaddr | pmpcfg | L | A | X | W | R | Comments | .4byte 0x0, 0x0FFFFFFF, write_pmpaddr_0 # | 0 | 0x0FFFFFFF | 1F | 0 | NAPOT | 0 | 1 | 1 | I/O 00000000-7FFFFFFF RW | diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-sie-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-sie-01.S index c3af8142e..64c0567cb 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-sie-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-sie-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",sie) + INIT_TESTS CAUSE_TRAP_TRIGGERS // initialize code that will cause traps for consistent mtval addresses @@ -43,7 +46,7 @@ WRITE_READ_CSR sie, 0x0 // force zeroing out mie CSR. jal cause_s_time_interrupt // write this from M mode to attempt to make s time interrupt pending GOTO_S_MODE // this would cause S mode interrupt to fire, but it shouldn't since SIE is zeored out. jal cause_s_soft_interrupt -li a3, 0x40 // these interrupts involve a time loop waiting for the interrupt to go off. +# li a3, 0x40 // these interrupts involve a time loop waiting for the interrupt to go off. // since interrupts are not always enabled, we need to make it stop after a certain number of loops, which is the number in a3 jal cause_s_ext_interrupt_GPIO diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S index 748d84e01..5152e8b1a 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S @@ -23,8 +23,15 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32IAF") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*A.*F.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",status-fp-enabled) + INIT_TESTS +// Set the FS bits to 01, This makes sure SAILs mstatus matches wally in the signature +li x29, 0x00002000 +csrw mstatus, x29 + TRAP_HANDLER m // Misa.F is already 1 in this config, making floating point enabled diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-mie-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-mie-01.S index 5330afde8..1fc792a98 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-mie-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-mie-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",status-mie) + INIT_TESTS CAUSE_TRAP_TRIGGERS // initialize code that will cause traps for consistent mtval addresses diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-sie-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-sie-01.S index 740e1b04e..81e230273 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-sie-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-sie-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",status-sie) + INIT_TESTS CAUSE_TRAP_TRIGGERS // initialize code that will cause traps for consistent mtval addresses diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-tw-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-tw-01.S index 61c8fc60c..15155fb76 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-tw-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-status-tw-01.S @@ -24,6 +24,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;", status-tw) + INIT_TESTS TRAP_HANDLER m, EXT_SIGNATURE=1 // turn on recording mtval and status bits on traps diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-stvec-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-stvec-01.S index 03dd7756f..e42ffb6dc 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-stvec-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-stvec-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",stvec) + INIT_TESTS CAUSE_TRAP_TRIGGERS // initialize code that will cause traps for consistent mtval addresses diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-01.S index 71f690deb..9385e3547 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",trap) + INIT_TESTS CAUSE_TRAP_TRIGGERS // initialize code that will cause traps for consistent mtval addresses @@ -48,7 +51,7 @@ GOTO_S_MODE // Causes U mode ecall GOTO_M_MODE // Causes S mode ecall -jal cause_s_soft_interrupt +jal cause_s_soft_from_m_interrupt jal cause_m_soft_interrupt jal cause_s_time_interrupt jal cause_m_time_interrupt @@ -70,14 +73,11 @@ jal cause_store_addr_misaligned jal cause_store_acc jal cause_ecall // M mode ecall -jal cause_s_soft_interrupt // The delegated S mode interrupts should not fire since we're running in M mode. +jal cause_s_soft_from_m_interrupt // The delegated S mode interrupts should not fire since we're running in M mode. jal cause_m_soft_interrupt jal cause_s_time_interrupt jal cause_m_time_interrupt -li a3, 0x40 // these interrupts involve a time loop waiting for the interrupt to go off. -// since interrupts are not always enabled, we need to make it stop after a certain number of loops, which is the number in a3 jal cause_s_ext_interrupt_GPIO -li a3, 0x40 jal cause_m_ext_interrupt diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-s-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-s-01.S index 82787516b..4b1e2afa4 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-s-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-s-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def NO_SAIL=True;",trap-s) + INIT_TESTS CAUSE_TRAP_TRIGGERS // initialize code that will cause traps for consistent mtval addresses diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-sret-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-sret-01.S index 635b09ab9..664eed600 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-sret-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-sret-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",trap-sret) + INIT_TESTS TRAP_HANDLER m, EXT_SIGNATURE=1 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-u-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-u-01.S index 90d4fd396..24fa9828c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-u-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-u-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",trap-u) + INIT_TESTS CAUSE_TRAP_TRIGGERS // initialize code that will cause traps for consistent mtval addresses diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-wfi-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-wfi-01.S index b086809a1..9669c6970 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-wfi-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-wfi-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",wfi) + INIT_TESTS TRAP_HANDLER m, EXT_SIGNATURE=1 // turn on recording mtval and status bits on traps diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-ADD.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-ADD.S index 817693dcb..ed51aca50 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-ADD.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-ADD.S @@ -19,6 +19,7 @@ #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV64I") .section .text.init .globl rvtest_entry_point @@ -28,6 +29,8 @@ RVTEST_CODE_BEGIN RVTEST_SIGBASE( x6, wally_signature) +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",add) + # Testcase 0: rs1:x20(0x0000000000000000), rs2:x22(0x0000000000000000), result rd:x3(0x0000000000000000) li x20, MASK_XLEN(0x0000000000000000) li x22, MASK_XLEN(0x0000000000000000) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S index 76d3fc285..134bfe2aa 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S @@ -19,6 +19,7 @@ #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV64I") .section .text.init .globl rvtest_entry_point @@ -28,6 +29,8 @@ RVTEST_CODE_BEGIN RVTEST_SIGBASE( x6, wally_signature) +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",slt) + # Testcase 0: rs1:x18(0x0000000000000000), rs2:x9(0x0000000000000000), result rd:x5(0x0000000000000000) li x18, MASK_XLEN(0x0000000000000000) li x9, MASK_XLEN(0x0000000000000000) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLTU.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLTU.S index 8206bc6f6..d3671a1e5 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLTU.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLTU.S @@ -19,6 +19,7 @@ #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV64I") .section .text.init .globl rvtest_entry_point @@ -28,6 +29,8 @@ RVTEST_CODE_BEGIN RVTEST_SIGBASE( x6, wally_signature) +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",sltu) + # Testcase 0: rs1:x22(0x0000000000000000), rs2:x23(0x0000000000000000), result rd:x2(0x0000000000000000) li x22, MASK_XLEN(0x0000000000000000) li x23, MASK_XLEN(0x0000000000000000) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SUB.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SUB.S index 7e82499b3..bd71d8ba0 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SUB.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SUB.S @@ -19,6 +19,7 @@ #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV64I") .section .text.init .globl rvtest_entry_point @@ -28,6 +29,8 @@ RVTEST_CODE_BEGIN RVTEST_SIGBASE( x6, wally_signature) +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",sub) + # Testcase 0: rs1:x4(0x0000000000000000), rs2:x23(0x0000000000000000), result rd:x13(0x0000000000000000) li x4, MASK_XLEN(0x0000000000000000) li x23, MASK_XLEN(0x0000000000000000) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-XOR.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-XOR.S index d34f057ba..b91ba798f 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-XOR.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-XOR.S @@ -19,6 +19,7 @@ #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV64I") .section .text.init .globl rvtest_entry_point @@ -28,6 +29,8 @@ RVTEST_CODE_BEGIN RVTEST_SIGBASE( x6, wally_signature) +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",xor) + # Testcase 0: rs1:x27(0x0000000000000000), rs2:x22(0x0000000000000000), result rd:x17(0x0000000000000000) li x27, MASK_XLEN(0x0000000000000000) li x22, MASK_XLEN(0x0000000000000000) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h index 319f2233b..60872449e 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h @@ -26,7 +26,7 @@ .macro INIT_TESTS -RVTEST_ISA("RV64I") +// RVTEST_ISA("RV64I") .section .text.init .globl rvtest_entry_point @@ -156,6 +156,7 @@ cause_s_soft_interrupt: cause_m_ext_interrupt: // ========== Configure PLIC ========== + li a3, 0x40 // m priority threshold = 0 li t3, 0xC200000 li t4, 0 @@ -192,6 +193,7 @@ m_ext_loop: cause_s_ext_interrupt_GPIO: // ========== Configure PLIC ========== + li a3, 0x40 // s priority threshold = 0 li t3, 0xC201000 li t4, 0 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-amo.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-amo.S index b12297902..1d246a54b 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-amo.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-amo.S @@ -24,7 +24,8 @@ #include "model_test.h" #include "arch_test.h" -RVTEST_ISA("RV64I") +RVTEST_ISA("RV64IAF") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.A*.F*.*);def TEST_CASE_1=True;",amo) .section .text.init .globl rvtest_entry_point diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-csr-permission-s-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-csr-permission-s-01.S index 9b24b6efe..2ff2e99f2 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-csr-permission-s-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-csr-permission-s-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",csr-permission-s) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-csr-permission-u-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-csr-permission-u-01.S index 904e15dbd..848186a98 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-csr-permission-u-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-csr-permission-u-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",csr-permission-u) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-lrsc.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-lrsc.S index b38331a09..3cf332f6b 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-lrsc.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-lrsc.S @@ -24,7 +24,8 @@ #include "model_test.h" #include "arch_test.h" -RVTEST_ISA("RV64I") +RVTEST_ISA("RV64IAF") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.A*.F*.*);def TEST_CASE_1=True",lrsc) .section .text.init .globl rvtest_entry_point diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mie-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mie-01.S index 84bcabc4f..664e8ce08 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mie-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mie-01.S @@ -22,7 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" - +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",mie) INIT_TESTS CAUSE_TRAP_TRIGGERS // initialize code that will cause traps for consistent mtval addresses diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-minfo-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-minfo-01.S index bdfe4b316..5f4ae446a 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-minfo-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-minfo-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",minfo) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misa-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misa-01.S index 6aafa0098..9dc0b5c43 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misa-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misa-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",misa) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv39.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv39.S index de6202c74..2a54d4868 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv39.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv39.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",mmu-sv39) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv48.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv48.S index 5de627acf..fafaa242a 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv48.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv48.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",sv48) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mtvec-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mtvec-01.S index 543857560..b192d229c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mtvec-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mtvec-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True; def NO_SAIL=True;",mtvec) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S index 705875146..c4e5a96e6 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S @@ -24,6 +24,8 @@ #include "arch_test.h" RVTEST_ISA("RV64I") +// this test is blocked, it won't build or run. To unblock it remove the check ISA:=regex(BLOCKED); +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True",periph) .section .text.init .globl rvtest_entry_point diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pma.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pma.S index 6d93cd8b6..3048c1ebc 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pma.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pma.S @@ -35,6 +35,8 @@ #define PLIC_RANGE 0x03FFFFFF #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",pma) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pmp.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pmp.S index 7c826ae8f..62e1befe1 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pmp.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pmp.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",pmp) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-sie-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-sie-01.S index cc681af28..28df4374c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-sie-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-sie-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",sie) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S index 6a3b53bea..1d7cb6bec 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S @@ -22,9 +22,15 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64IAF") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*A.*F.*);def TEST_CASE_1=True;def NO_SAIL=True;",status-fp-enabled) INIT_TESTS +// Set the FS bits to 01, This makes sure SAILs mstatus matches wally in the signature +li x29, 0x00002000 +csrw mstatus, x29 + TRAP_HANDLER m // Misa.F is already 1 in this config, making floating point enabled diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-mie-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-mie-01.S index c554bbf7a..6fc868984 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-mie-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-mie-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",status-mie) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-sie-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-sie-01.S index a35e85b46..2cd57164b 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-sie-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-sie-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",status-sie) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-tw-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-tw-01.S index b84b5103b..8b85c7d64 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-tw-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-tw-01.S @@ -23,6 +23,8 @@ #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",status-tw) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-stvec-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-stvec-01.S index 100834e36..4a4bdb768 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-stvec-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-stvec-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True; def NO_SAIL=True;",stvec) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-01.S index e3372a062..41d9cd072 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",trap) INIT_TESTS @@ -77,7 +79,6 @@ jal cause_m_time_interrupt li a3, 0x40 // these interrupts involve a time loop waiting for the interrupt to go off. // since interrupts are not always enabled, we need to make it stop after a certain number of loops, which is the number in a3 jal cause_s_ext_interrupt_GPIO -li a3, 0x40 jal cause_m_ext_interrupt diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-s-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-s-01.S index 147ad893c..cfe02f3a6 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-s-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-s-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",trap-s) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-sret-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-sret-01.S index 653eacac6..78692fd1c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-sret-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-sret-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",trap-sret) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-u-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-u-01.S index bc07738c9..e10e5d988 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-u-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-trap-u-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",trap-u) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-wfi-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-wfi-01.S index cd057c4db..c67d3c24a 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-wfi-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-wfi-01.S @@ -22,6 +22,8 @@ /////////////////////////////////////////// #include "WALLY-TEST-LIB-64.h" +RVTEST_ISA("RV64I") +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",wfi) INIT_TESTS From cb16a75119028ef7a280ff0390198e18c6ff0ce5 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 22 Jul 2022 12:35:37 -0700 Subject: [PATCH 084/138] Added PLIC test to regression --- pipelined/testbench/tests.vh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index a2145835d..326eb9dac 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1603,8 +1603,8 @@ string wally32i[] = '{ string wally32periph[] = '{ `WALLYTEST, - // "rv32i_m/privilege/WALLY-gpio-01", - // "rv32i_m/privilege/WALLY-clint-01" + "rv32i_m/privilege/WALLY-gpio-01", + "rv32i_m/privilege/WALLY-clint-01", "rv32i_m/privilege/WALLY-plic-01" // "rv32i_m/privilege/WALLY-uart-01" }; From 9cca5671369fa7c6d8330ddc049f5fbb151c8eaa Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 22 Jul 2022 12:35:59 -0700 Subject: [PATCH 085/138] Added test comments to reference output --- .../privilege/references/WALLY-plic-01.reference_output | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output index 8c6b8ef6a..6d3b051b1 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output @@ -214,23 +214,23 @@ 00000003 # claim gives 3 for ID of GPIO 00000400 # check GPIO interrupt pending cleared after claim 00000000 # check no interrupts pending -00000000 # read sip +00000000 # read sip (7.4) 00000408 # check GPIO and UART interrupt pending on intPending0 00000000 # nothing in claim register 00000408 # check GPIO and UART interrupt pending on intPending0 00000000 # check no interrupts pending -00000200 # read sip +00000200 # read sip (7.5) 00000408 # check GPIO and UART interrupt pending on intPending0 00000003 # claim gives 3 for ID of GPIO 00000400 # check GPIO interrupt pending cleared after claim 00000000 # check no interrupts pending -00000000 # read sip +00000000 # read sip (7.6) 00000408 # check GPIO and UART interrupt pending on intPending0 00000000 # nothing in claim register 00000408 # check GPIO and UART interrupt pending on intPending0 00000000 # check no interrupts pending 00000009 # output from ecall in supervisor mode -00000800 # MEIP set +00000800 # MEIP set (8.0) 00000408 # check GPIO and UART interrupt pending on intPending0 0000000A # claim UART 00000008 # GPIO interrupt pending after UART claimcomp From 139e657fcc8d1aed888faafc3784db3d11954fd8 Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Fri, 22 Jul 2022 13:52:13 -0700 Subject: [PATCH 086/138] commented out embench test that should be commented out --- pipelined/testbench/tests.vh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index b10bb951b..e30f671fa 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -61,7 +61,6 @@ string tvpaths[] = '{ "bd_speedopt_speed/src/nsichneu/nsichneu", "bd_speedopt_speed/src/picojpeg/picojpeg", // "bd_speedopt_speed/src/primecount/primecount", - "bd_speedopt_speed/src/qrduino/qrduino", "bd_speedopt_speed/src/sglib-combined/sglib-combined", "bd_speedopt_speed/src/slre/slre", "bd_speedopt_speed/src/st/st", @@ -82,7 +81,7 @@ string tvpaths[] = '{ "bd_sizeopt_speed/src/nettle-sha256/nettle-sha256", "bd_sizeopt_speed/src/nsichneu/nsichneu", "bd_sizeopt_speed/src/picojpeg/picojpeg", - "bd_sizeopt_speed/src/primecount/primecount", + // "bd_sizeopt_speed/src/primecount/primecount", "bd_sizeopt_speed/src/qrduino/qrduino", "bd_sizeopt_speed/src/sglib-combined/sglib-combined", "bd_sizeopt_speed/src/slre/slre", From 141f2a40e4cb98cb9ac4327c3575e75b70d312e6 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 22 Jul 2022 14:49:03 -0700 Subject: [PATCH 087/138] UART updates and PMA fix --- .../references/WALLY-uart-01.reference_output | 15 +++++++++-- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 1 - .../rv32i_m/privilege/src/WALLY-uart-01.S | 26 ++++++++++++++++++- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output index 457bc35c8..899884395 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output @@ -15,7 +15,7 @@ 00000101 # Transmit 7 bits 0000007F 00000101 # Transmit 8 bits -00000080 +ffffff80 00000002 # Transmission interrupt tests 00000401 # Interrupt generated by finished transmission 00000004 @@ -27,7 +27,18 @@ 00000000 00000011 00000001 - +00000000 # DSR Test +00000032 +00000001 +00000000 # RI Test +00000034 +00000001 +00000000 # DCD Test +ffffffB8 +00000001 +ffffffC2 # FIFO interrupt +0000C101 +00000000 0000000b # ecall from test termination diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index 17ff15c02..cff9ec249 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -954,7 +954,6 @@ read08_test: // address to read in t3, expected 8 bit value in t4 (unused, but there for your perusal). li t2, 0xBAD // bad value that will be overwritten on good reads. lb t2, 0(t3) - andi t2, t2, 0xFF // mask to lower 8 bits sw t2, 0(t1) addi t1, t1, 4 addi a6, a6, 4 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S index 08b1dc25e..82aca9867 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S @@ -132,9 +132,33 @@ test_cases: .4byte UART_MSR, 0x00, write08_test # clear MSR .4byte UART_IER, 0x08, write08_test # enable MODEM Status interrupts .4byte UART_IIR, 0x01, read08_test # no interrupts pending -.4byte UART_MCR, 0x02, write08_test # Cause DCTS interrupt +.4byte UART_MCR, 0x12, write08_test # Cause DCTS interrupt .4byte UART_IIR, 0x00, read08_test # MODEM interrupt .4byte UART_MSR, 0x11, read08_test # Read MSR to clear interrupt .4byte UART_IIR, 0x01, read08_test # interrupt cleared by reading MSR +.4byte UART_MCR, 0x13, write08_test # Set DSR high +.4byte UART_IIR, 0x00, read08_test # MODEM interrupt +.4byte UART_MSR, 0x32, read08_test # Read MSR to clear interrupt +.4byte UART_IIR, 0x01, read08_test # Interrupt cleared by reading MSR +.4byte UART_MCR, 0x17, write08_test # Set RIb low and keep CTS and DSR +.4byte UART_MCR, 0x13, write08_test # Set RIb high and keep CTS and DSR +.4byte UART_IIR, 0x00, read08_test # MODEM interrupt +.4byte UART_MSR, 0x34, read08_test # Read MSR to clear interrupt +.4byte UART_IIR, 0x01, read08_test # Interrupt cleared by reading MSR +.4byte UART_MCR, 0x1B, write08_test # Set DCD high and keep CTS and DSR +.4byte UART_IIR, 0x00, read08_test # MODEM interrupt +.4byte UART_MSR, 0xb8, read08_test # Read MSR to clear interrupt +.4byte UART_IIR, 0x01, read08_test # Interrupt cleared by reading MSR +.4byte UART_MCR, 0x10, write08_test # Clear MCR +.4byte UART_MSR, 0x00, write08_test # Clear MSR + +# =========== FIFO interrupts =========== + +.4byte UART_IER, 0x07, write08_test # enable data available, buffer empty, and line status interrupts +.4byte UART_FCR, 0x41, write08_test # Set FIFO threshold to 4 and enable FIFO mode +.4byte UART_IIR, 0xC2, read08_test # Enabling FIFO sets top two bits of IIR +.4byte UART_THR, 0x00, write08_test # write 0 to transmit register +.4byte 0x0, 0xC101, uart_data_wait # no interrupts pending (transmitter interrupt squashed by early read) +.4byte UART_RBR, 0x00, read08_test # read 0 from buffer register .4byte 0x0, 0x0, terminate_test \ No newline at end of file From ec1ed5bd944aedfeb364ee0224ccd91b789f49d6 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 22 Jul 2022 14:55:34 -0700 Subject: [PATCH 088/138] Added UART test to peripheral test --- pipelined/testbench/tests.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index bcb144eee..0c60bb3c9 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1605,7 +1605,7 @@ string wally32i[] = '{ "rv32i_m/privilege/WALLY-gpio-01", "rv32i_m/privilege/WALLY-clint-01", "rv32i_m/privilege/WALLY-plic-01" - // "rv32i_m/privilege/WALLY-uart-01" + "rv32i_m/privilege/WALLY-uart-01" }; From ba2dcf6da4af9e486da4dd4b72b664f28b80ac45 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 22 Jul 2022 14:55:55 -0700 Subject: [PATCH 089/138] fixed error in tests.vh --- pipelined/testbench/tests.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 0c60bb3c9..cc9c75d6a 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1604,7 +1604,7 @@ string wally32i[] = '{ `WALLYTEST, "rv32i_m/privilege/WALLY-gpio-01", "rv32i_m/privilege/WALLY-clint-01", - "rv32i_m/privilege/WALLY-plic-01" + "rv32i_m/privilege/WALLY-plic-01", "rv32i_m/privilege/WALLY-uart-01" }; From 4198145ce281b94c695cec15a2b6676eb13ca7ef Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Fri, 22 Jul 2022 14:58:55 -0700 Subject: [PATCH 090/138] added changes to stvec of reference signatures, modified some tests to copy over reference file instead of running on sail --- pipelined/testbench/tests.vh | 308 +++++++----------- .../rv32i_m/privilege/src/WALLY-plic-01.S | 3 + .../rv32i_m/privilege/src/WALLY-plic-s-01.S | 3 + .../rv32i_m/privilege/src/WALLY-pmp.S | 2 +- .../rv32i_m/privilege/src/WALLY-uart-01.S | 3 + .../references/WALLY-trap-01.reference_output | 12 +- .../WALLY-trap-s-01.reference_output | 12 +- .../WALLY-trap-u-01.reference_output | 12 +- .../rv64i_m/privilege/src/WALLY-pmp.S | 2 +- .../privilege/src/WALLY-status-sie-01.S | 2 +- .../privilege/src/WALLY-status-tw-01.S | 2 +- 11 files changed, 141 insertions(+), 220 deletions(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index a2145835d..806709f08 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -34,7 +34,7 @@ string tvpaths[] = '{ "../../addins/imperas-riscv-tests/work/", "../../tests/riscof/work/riscv-arch-test/", - "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", // + "../../tests/riscof/work/wally-riscv-arch-test/", "../../tests/imperas-riscv-tests/work/", "../../benchmarks/coremark/work/", "../../addins/embench-iot/" @@ -93,24 +93,8 @@ string tvpaths[] = '{ "bd_sizeopt_speed/src/wikisort/wikisort" }; - string wally64a[] = '{ - `WALLYTEST, - "rv64i_m/privilege/WALLY-amo", - "rv64i_m/privilege/WALLY-lrsc", - "rv64i_m/privilege/WALLY-status-fp-enabled-01" - }; - - string wally32a[] = '{ - `WALLYTEST, - "rv32i_m/privilege/WALLY-amo", - "rv32i_m/privilege/WALLY-lrsc", - "rv32i_m/privilege/WALLY-status-fp-enabled-01" - - }; - // *** restore CSR tests from Imperas old - - string extra64i[] = '{ + string extra64i[] = '{ `MYIMPERASTEST, "rv64i_m/I/WALLY-ADD", "rv64i_m/I/WALLY-SUB", @@ -920,6 +904,20 @@ string imperas32f[] = '{ "rv32p/WALLY-CSR-PERMISSIONS-S" }; + string wally64a[] = '{ + `WALLYTEST, + "rv64i_m/privilege/src/WALLY-amo.S", + "rv64i_m/privilege/src/WALLY-lrsc.S", + "rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S" + }; + + string wally32a[] = '{ + `WALLYTEST, + "rv32i_m/privilege/src/WALLY-amo.S", + "rv32i_m/privilege/src/WALLY-lrsc.S", + "rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S" + }; + string arch64priv[] = '{ `RISCVARCHTEST, "rv64i_m/privilege/src/ebreak.S", @@ -1492,212 +1490,126 @@ string imperas32f[] = '{ string wally64i[] = '{ `WALLYTEST, - "rv64i_m/I/WALLY-ADD", - "rv64i_m/I/WALLY-SLT", - "rv64i_m/I/WALLY-SLTU", - "rv64i_m/I/WALLY-SUB", - "rv64i_m/I/WALLY-XOR" + "rv64i_m/I/src/WALLY-ADD.S", + "rv64i_m/I/src/WALLY-SLT.S", + "rv64i_m/I/src/WALLY-SLTU.S", + "rv64i_m/I/src/WALLY-SUB.S", + "rv64i_m/I/src/WALLY-XOR.S" }; + string wally64priv[] = '{ `WALLYTEST, - "rv64i_m/privilege/WALLY-status-tw-01", - "rv64i_m/privilege/WALLY-csr-permission-s-01", - "rv64i_m/privilege/WALLY-csr-permission-u-01", - "rv64i_m/privilege/WALLY-minfo-01", - "rv64i_m/privilege/WALLY-misa-01", - "rv64i_m/privilege/WALLY-mmu-sv39", - "rv64i_m/privilege/WALLY-mmu-sv48", - "rv64i_m/privilege/WALLY-pma", - "rv64i_m/privilege/WALLY-pmp", - "rv64i_m/privilege/WALLY-trap-01", - "rv64i_m/privilege/WALLY-trap-s-01", - "rv64i_m/privilege/WALLY-trap-u-01", - "rv64i_m/privilege/WALLY-mie-01", - "rv64i_m/privilege/WALLY-sie-01", - "rv64i_m/privilege/WALLY-mtvec-01", - "rv64i_m/privilege/WALLY-stvec-01", - "rv64i_m/privilege/WALLY-status-mie-01", - "rv64i_m/privilege/WALLY-status-sie-01", - "rv64i_m/privilege/WALLY-trap-sret-01", - "rv64i_m/privilege/WALLY-status-tw-01", - "rv64i_m/privilege/WALLY-wfi-01" + "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S", + "rv64i_m/privilege/src/WALLY-csr-permission-u-01.S", + "rv64i_m/privilege/src/WALLY-mie-01.S", + "rv64i_m/privilege/src/WALLY-minfo-01.S", + "rv64i_m/privilege/src/WALLY-misa-01.S", + "rv64i_m/privilege/src/WALLY-mmu-sv39.S", + "rv64i_m/privilege/src/WALLY-mmu-sv48.S", + "rv64i_m/privilege/src/WALLY-mtvec-01.S", + "rv64i_m/privilege/src/WALLY-pma.S", + "rv64i_m/privilege/src/WALLY-pmp.S", + "rv64i_m/privilege/src/WALLY-sie-01.S", + "rv64i_m/privilege/src/WALLY-status-mie-01.S", + "rv64i_m/privilege/src/WALLY-status-sie-01.S", + "rv64i_m/privilege/src/WALLY-status-tw-01.S", + "rv64i_m/privilege/src/WALLY-stvec-01.S", + "rv64i_m/privilege/src/WALLY-trap-01.S", + "rv64i_m/privilege/src/WALLY-trap-s-01.S", + "rv64i_m/privilege/src/WALLY-trap-sret-01.S", + "rv64i_m/privilege/src/WALLY-trap-u-01.S", + "rv64i_m/privilege/src/WALLY-wfi-01.S" }; string wally64periph[] = '{ `WALLYTEST, - "rv64i_m/privilege/WALLY-periph" + "rv64i_m/privilege/src/WALLY-periph.S" }; string wally32e[] = '{ `WALLYTEST, - "rv32i_m/I/E-add-01", - "rv32i_m/I/E-addi-01", - "rv32i_m/I/E-and-01", - "rv32i_m/I/E-andi-01", - "rv32i_m/I/E-auipc-01", - "rv32i_m/I/E-bge-01", - "rv32i_m/I/E-bgeu-01", - "rv32i_m/I/E-blt-01", - "rv32i_m/I/E-bltu-01", - "rv32i_m/I/E-bne-01", - "rv32i_m/I/E-jal-01", - "rv32i_m/I/E-jalr-01", - "rv32i_m/I/E-lb-align-01", - "rv32i_m/I/E-lbu-align-01", - "rv32i_m/I/E-lh-align-01", - "rv32i_m/I/E-lhu-align-01", - "rv32i_m/I/E-lui-01", - "rv32i_m/I/E-lw-align-01", - "rv32i_m/I/E-or-01", - "rv32i_m/I/E-ori-01", - "rv32i_m/I/E-sb-align-01", - "rv32i_m/I/E-sh-align-01", - "rv32i_m/I/E-sll-01", - "rv32i_m/I/E-slli-01", - "rv32i_m/I/E-slt-01", - "rv32i_m/I/E-slti-01", - "rv32i_m/I/E-sltiu-01", - "rv32i_m/I/E-sltu-01", - "rv32i_m/I/E-sra-01", - "rv32i_m/I/E-srai-01", - "rv32i_m/I/E-srl-01", - "rv32i_m/I/E-srli-01", - "rv32i_m/I/E-sub-01", - "rv32i_m/I/E-sw-align-01", - "rv32i_m/I/E-xor-01", - "rv32i_m/I/E-xori-01" + "rv32i_m/I/src/E-add-01.S", + "rv32i_m/I/src/E-addi-01.S", + "rv32i_m/I/src/E-and-01.S", + "rv32i_m/I/src/E-andi-01.S", + "rv32i_m/I/src/E-auipc-01.S", + "rv32i_m/I/src/E-bge-01.S", + "rv32i_m/I/src/E-bgeu-01.S", + "rv32i_m/I/src/E-blt-01.S", + "rv32i_m/I/src/E-bltu-01.S", + "rv32i_m/I/src/E-bne-01.S", + "rv32i_m/I/src/E-jal-01.S", + "rv32i_m/I/src/E-jalr-01.S", + "rv32i_m/I/src/E-lb-align-01.S", + "rv32i_m/I/src/E-lbu-align-01.S", + "rv32i_m/I/src/E-lh-align-01.S", + "rv32i_m/I/src/E-lhu-align-01.S", + "rv32i_m/I/src/E-lui-01.S", + "rv32i_m/I/src/E-lw-align-01.S", + "rv32i_m/I/src/E-or-01.S", + "rv32i_m/I/src/E-ori-01.S", + "rv32i_m/I/src/E-sb-align-01.S", + "rv32i_m/I/src/E-sh-align-01.S", + "rv32i_m/I/src/E-sll-01.S", + "rv32i_m/I/src/E-slli-01.S", + "rv32i_m/I/src/E-slt-01.S", + "rv32i_m/I/src/E-slti-01.S", + "rv32i_m/I/src/E-sltiu-01.S", + "rv32i_m/I/src/E-sltu-01.S", + "rv32i_m/I/src/E-sra-01.S", + "rv32i_m/I/src/E-srai-01.S", + "rv32i_m/I/src/E-srl-01.S", + "rv32i_m/I/src/E-srli-01.S", + "rv32i_m/I/src/E-sub-01.S", + "rv32i_m/I/src/E-sw-align-01.S", + "rv32i_m/I/src/E-xor-01.S", + "rv32i_m/I/src/E-xori-01.S" }; -string wally32i[] = '{ + string wally32i[] = '{ `WALLYTEST, - "rv32i_m/I/WALLY-ADD", - "rv32i_m/I/WALLY-SLT", - "rv32i_m/I/WALLY-SLTU", - "rv32i_m/I/WALLY-SUB", - "rv32i_m/I/WALLY-XOR" + "rv32i_m/I/src/WALLY-ADD.S", + "rv32i_m/I/src/WALLY-SLT.S", + "rv32i_m/I/src/WALLY-SLTU.S", + "rv32i_m/I/src/WALLY-SUB.S", + "rv32i_m/I/src/WALLY-XOR.S" }; + string wally32priv[] = '{ `WALLYTEST, - "rv32i_m/privilege/WALLY-csr-permission-s-01", - "rv32i_m/privilege/WALLY-csr-permission-u-01", - "rv32i_m/privilege/WALLY-minfo-01", - "rv32i_m/privilege/WALLY-misa-01", - "rv32i_m/privilege/WALLY-mmu-sv32", - "rv32i_m/privilege/WALLY-pma", - "rv32i_m/privilege/WALLY-pmp", - "rv32i_m/privilege/WALLY-trap-01", - "rv32i_m/privilege/WALLY-trap-s-01", - "rv32i_m/privilege/WALLY-trap-u-01", - "rv32i_m/privilege/WALLY-mie-01", - "rv32i_m/privilege/WALLY-sie-01", - "rv32i_m/privilege/WALLY-mtvec-01", - "rv32i_m/privilege/WALLY-stvec-01", - "rv32i_m/privilege/WALLY-status-mie-01", - "rv32i_m/privilege/WALLY-status-sie-01", - "rv32i_m/privilege/WALLY-trap-sret-01", - "rv32i_m/privilege/WALLY-status-tw-01", - "rv32i_m/privilege/WALLY-wfi-01" + "rv32i_m/privilege/src/WALLY-csr-permission-s-01.S", + "rv32i_m/privilege/src/WALLY-csr-permission-u-01.S", + "rv32i_m/privilege/src/WALLY-mie-01.S", + "rv32i_m/privilege/src/WALLY-minfo-01.S", + "rv32i_m/privilege/src/WALLY-misa-01.S", + "rv32i_m/privilege/src/WALLY-mmu-sv32.S", + "rv32i_m/privilege/src/WALLY-mtvec-01.S", + "rv32i_m/privilege/src/WALLY-pma.S", + "rv32i_m/privilege/src/WALLY-pmp.S", + "rv32i_m/privilege/src/WALLY-sie-01.S", + "rv32i_m/privilege/src/WALLY-status-mie-01.S", + "rv32i_m/privilege/src/WALLY-status-sie-01.S", + "rv32i_m/privilege/src/WALLY-status-tw-01.S", + "rv32i_m/privilege/src/WALLY-stvec-01.S", + "rv32i_m/privilege/src/WALLY-trap-01.S", + "rv32i_m/privilege/src/WALLY-trap-s-01.S", + "rv32i_m/privilege/src/WALLY-trap-sret-01.S", + "rv32i_m/privilege/src/WALLY-trap-u-01.S", + "rv32i_m/privilege/src/WALLY-wfi-01.S" }; string wally32periph[] = '{ `WALLYTEST, - // "rv32i_m/privilege/WALLY-gpio-01", - // "rv32i_m/privilege/WALLY-clint-01" - "rv32i_m/privilege/WALLY-plic-01" - // "rv32i_m/privilege/WALLY-uart-01" + "rv32i_m/privilege/src/WALLY-gpio-01.S", + "rv32i_m/privilege/src/WALLY-clint-01.S", + "rv32i_m/privilege/src/WALLY-uart-01.S", + "rv32i_m/privilege/src/WALLY-plic-01.S" }; -// riscof test paths, to replace existing paths once riscof flow is working -// string wally64a[] = '{ -// `WALLYTEST, -// "rv64i_m/privilege/src/WALLY-amo.S", -// "rv64i_m/privilege/src/WALLY-lrsc.S", -// "rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S" -// }; - -// string wally32a[] = '{ -// `WALLYTEST, -// "rv32i_m/privilege/src/WALLY-amo.S", -// "rv32i_m/privilege/src/WALLY-lrsc.S", -// "rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S" - -// }; - -// string wally64i[] = '{ -// `WALLYTEST, -// "rv64i_m/I/src/WALLY-ADD.S", -// "rv64i_m/I/src/WALLY-SLT.S", -// "rv64i_m/I/src/WALLY-SLTU.S", -// "rv64i_m/I/src/WALLY-SUB.S", -// "rv64i_m/I/src/WALLY-XOR.S" -// }; - -// string wally64priv[] = '{ -// `WALLYTEST, -// "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S", -// "rv64i_m/privilege/src/WALLY-csr-permission-u-01.S", -// "rv64i_m/privilege/src/WALLY-mie-01.S", -// "rv64i_m/privilege/src/WALLY-minfo-01.S", -// "rv64i_m/privilege/src/WALLY-misa-01.S", -// "rv64i_m/privilege/src/WALLY-mmu-sv39.S", -// "rv64i_m/privilege/src/WALLY-mmu-sv48.S", -// "rv64i_m/privilege/src/WALLY-mtvec-01.S", -// "rv64i_m/privilege/src/WALLY-pma.S", -// "rv64i_m/privilege/src/WALLY-pmp.S", -// "rv64i_m/privilege/src/WALLY-sie-01.S", -// "rv64i_m/privilege/src/WALLY-status-mie-01.S", -// "rv64i_m/privilege/src/WALLY-status-sie-01.S", -// "rv64i_m/privilege/src/WALLY-status-tw-01.S", -// "rv64i_m/privilege/src/WALLY-stvec-01.S", -// "rv64i_m/privilege/src/WALLY-trap-01.S", -// "rv64i_m/privilege/src/WALLY-trap-s-01.S", -// "rv64i_m/privilege/src/WALLY-trap-sret-01.S", -// "rv64i_m/privilege/src/WALLY-trap-u-01.S", -// "rv64i_m/privilege/src/WALLY-wfi-01.S" -// }; - -// string wally64periph[] = '{ -// `WALLYTEST, -// "rv64i_m/privilege/src/WALLY-periph.S" -// }; - - string wally32d[] = '{ `WALLYTEST, "rv32i_m/D/src/WALLY-fld.S" - }; - -// string wally32i[] = '{ -// `WALLYTEST, -// "rv32i_m/I/src/WALLY-ADD.S", -// "rv32i_m/I/src/WALLY-SLT.S", -// "rv32i_m/I/src/WALLY-SLTU.S", -// "rv32i_m/I/src/WALLY-SUB.S", -// "rv32i_m/I/src/WALLY-XOR.S" -// }; - -// string wally32priv[] = '{ -// `WALLYTEST, -// "rv32i_m/privilege/src/WALLY-csr-permission-s-01.S", -// "rv32i_m/privilege/src/WALLY-csr-permission-u-01.S", -// "rv32i_m/privilege/src/WALLY-mie-01.S", -// "rv32i_m/privilege/src/WALLY-minfo-01.S", -// "rv32i_m/privilege/src/WALLY-misa-01.S", -// "rv32i_m/privilege/src/WALLY-mmu-sv32.S", -// "rv32i_m/privilege/src/WALLY-mtvec-01.S", -// "rv32i_m/privilege/src/WALLY-pma.S", -// "rv32i_m/privilege/src/WALLY-pmp.S", -// "rv32i_m/privilege/src/WALLY-sie-01.S", -// "rv32i_m/privilege/src/WALLY-status-mie-01.S", -// "rv32i_m/privilege/src/WALLY-status-sie-01.S", -// "rv32i_m/privilege/src/WALLY-status-tw-01.S", -// "rv32i_m/privilege/src/WALLY-stvec-01.S", -// "rv32i_m/privilege/src/WALLY-trap-01.S", -// "rv32i_m/privilege/src/WALLY-trap-s-01.S", -// "rv32i_m/privilege/src/WALLY-trap-sret-01.S", -// "rv32i_m/privilege/src/WALLY-trap-u-01.S", -// "rv32i_m/privilege/src/WALLY-wfi-01.S" -// }; + }; \ No newline at end of file diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S index 81a48a23f..e6c2ef417 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",plic) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S index 45b87c4b1..f43de1bd3 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",plic-s) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pmp.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pmp.S index cf03edd74..0f93d4086 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pmp.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-pmp.S @@ -24,7 +24,7 @@ #include "WALLY-TEST-LIB-32.h" RVTEST_ISA("RV32I") -RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",pmp) +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",pmp) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S index 08b1dc25e..1547cee43 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S @@ -23,6 +23,9 @@ #include "WALLY-TEST-LIB-32.h" +RVTEST_ISA("RV32I") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",uart) + INIT_TESTS TRAP_HANDLER m diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output index 71d93833b..d77998a66 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output @@ -14,13 +14,13 @@ 00000000 00000003 # mcause from Breakpoint 00000000 -80000400 # mtval of breakpoint instruction adress (0x80000400) +800003f4 # mtval of breakpoint instruction adress (0x80000400) 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 00000004 # mcause from load address misaligned 00000000 -80000409 # mtval of misaligned address (0x80000409) +800003fd # mtval of misaligned address (0x80000409) 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 @@ -32,7 +32,7 @@ 00000000 00000006 # mcause from store misaligned 00000000 -80000421 # mtval of address with misaligned store instr (0x80000421) +80000415 # mtval of address with misaligned store instr (0x80000421) 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 @@ -126,13 +126,13 @@ ffffffff 00000000 00000003 # mcause from Breakpoint 00000000 -80000400 # mtval of breakpoint instruction adress (0x80000400) +800003f4 # mtval of breakpoint instruction adress (0x80000400) 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 00000004 # mcause from load address misaligned 00000000 -80000409 # mtval of misaligned address (0x80000409) +800003fd # mtval of misaligned address (0x80000409) 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 @@ -144,7 +144,7 @@ ffffffff 00000000 00000006 # mcause from store misaligned 00000000 -80000421 # mtval of address with misaligned store instr (0x80000421) +80000415 # mtval of address with misaligned store instr (0x80000421) 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output index e837095d4..fe559dfb7 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output @@ -20,13 +20,13 @@ 00000000 00000003 # scause from Breakpoint 00000000 -80000400 # stval of breakpoint instruction adress (0x80000400) +800003f4 # stval of breakpoint instruction adress (0x80000400) 00000000 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 00000004 # scause from load address misaligned 00000000 -80000409 # stval of misaligned address (0x80000409) +800003fd # stval of misaligned address (0x80000409) 00000000 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -38,7 +38,7 @@ 00000000 00000006 # scause from store misaligned 00000000 -80000421 # stval of address with misaligned store instr (0x80000421) +80000415 # stval of address with misaligned store instr (0x80000421) 00000000 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -130,13 +130,13 @@ ffffffff 00000000 00000003 # scause from Breakpoint 00000000 -80000400 # stval of breakpoint instruction adress (0x80000400) +800003f4 # stval of breakpoint instruction adress (0x80000400) 00000000 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 00000004 # scause from load address misaligned 00000000 -80000409 # stval of misaligned address (0x80000409) +800003fd # stval of misaligned address (0x80000409) 00000000 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 @@ -148,7 +148,7 @@ ffffffff 00000000 00000006 # scause from store misaligned 00000000 -80000421 # stval of address with misaligned store instr (0x80000421) +80000415 # stval of address with misaligned store instr (0x80000421) 00000000 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output index b0fe37c33..39f874ef7 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output @@ -20,13 +20,13 @@ 00000000 00000003 # scause from Breakpoint 00000000 -80000400 # stval of breakpoint instruction adress (0x80000400) +800003f4 # stval of breakpoint instruction adress (0x80000400) 00000000 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 00000004 # scause from load address misaligned 00000000 -80000409 # stval of misaligned address (0x80000409) +800003fd # stval of misaligned address (0x80000409) 00000000 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -38,7 +38,7 @@ 00000000 00000006 # scause from store misaligned 00000000 -80000421 # stval of address with misaligned store instr (0x80000421) +80000415 # stval of address with misaligned store instr (0x80000421) 00000000 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -116,13 +116,13 @@ ffffffff 00000000 00000003 # scause from Breakpoint 00000000 -80000400 # stval of breakpoint instruction adress (0x80000400) +800003f4 # stval of breakpoint instruction adress (0x80000400) 00000000 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 00000004 # scause from load address misaligned 00000000 -80000409 # stval of misaligned address (0x80000409) +800003fd # stval of misaligned address (0x80000409) 00000000 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 @@ -134,7 +134,7 @@ ffffffff 00000000 00000006 # scause from store misaligned 00000000 -80000421 # stval of address with misaligned store instr (0x80000421) +80000415 # stval of address with misaligned store instr (0x80000421) 00000000 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pmp.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pmp.S index 62e1befe1..fc3864d38 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pmp.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-pmp.S @@ -23,7 +23,7 @@ #include "WALLY-TEST-LIB-64.h" RVTEST_ISA("RV64I") -RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",pmp) +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",pmp) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-sie-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-sie-01.S index 2cd57164b..e9a9e443b 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-sie-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-sie-01.S @@ -23,7 +23,7 @@ #include "WALLY-TEST-LIB-64.h" RVTEST_ISA("RV64I") -RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",status-sie) +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",status-sie) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-tw-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-tw-01.S index 8b85c7d64..184b022ed 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-tw-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-status-tw-01.S @@ -24,7 +24,7 @@ #include "WALLY-TEST-LIB-64.h" RVTEST_ISA("RV64I") -RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",status-tw) +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",status-tw) INIT_TESTS From b3d932cd610d60660ce29cccb58ecae8fd392224 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 22 Jul 2022 22:02:04 +0000 Subject: [PATCH 091/138] divider sizes reworked to match book --- addins/riscv-arch-test | 2 +- pipelined/config/rv64fp/wally-config.vh | 2 +- pipelined/config/shared/wally-shared.vh | 4 +- pipelined/regression/wave-fpu.do | 3 +- pipelined/src/fpu/divshiftcalc.sv | 7 +- pipelined/src/fpu/divsqrt.sv | 22 +-- pipelined/src/fpu/fctrl.sv | 6 +- pipelined/src/fpu/flags.sv | 6 +- pipelined/src/fpu/fpu.sv | 4 +- pipelined/src/fpu/otfc.sv | 20 +-- pipelined/src/fpu/postprocess.sv | 6 +- pipelined/src/fpu/qsel.sv | 12 +- pipelined/src/fpu/roundsign.sv | 3 +- pipelined/src/fpu/srt.sv | 171 +++++++++++----------- pipelined/src/fpu/srtfsm.sv | 14 +- pipelined/src/fpu/srtpreproc.sv | 23 +-- pipelined/srt/inttestgen.c | 2 +- pipelined/testbench/testbench-fp.sv | 186 ++++++++++++++---------- 18 files changed, 269 insertions(+), 224 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 307c77b26..be67c99bd 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index 8f13b2e36..cc8d1b2b8 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,7 +32,7 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 +`define XLEN 32 // IEEE 754 compliance `define IEEE754 0 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index b2abdff7b..ea39ca35d 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -102,8 +102,9 @@ // division constants `define RADIX 32'h4 -`define DIVCOPIES 32'h1 +`define DIVCOPIES 32'h4 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) +`define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) `define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3)) `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) @@ -113,6 +114,7 @@ `define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) +`define DIVb (`FPDUR*`LOGR*`DIVCOPIES)-1 `define USE_SRAM 0 diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index b71207e09..e16d7b0b5 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -32,8 +32,9 @@ add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/* # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/* # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/genblk1/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* -# add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index 3fbc9419d..8095b5171 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -1,8 +1,9 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`QLEN-1-(`RADIX/4):0] DivQm, + input logic [`DIVb-(`RADIX/4):0] DivQm, input logic [`FMTBITS-1:0] Fmt, + input logic Sqrt, input logic [`DURLEN-1:0] DivEarlyTermShift, input logic [`NE+1:0] DivQe, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, @@ -34,8 +35,8 @@ module divshiftcalc( assign NormShift = (`NE+2)'(`NF); // if the shift amount is negitive then dont shift (keep sticky bit) // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) - assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}}; + assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~(DivDenormShift[`NE+1]|Sqrt)}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}}; - assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; + assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb+1+(`RADIX/4)-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 7ba44a953..70610bcd3 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -34,6 +34,7 @@ module divsqrt( input logic clk, input logic reset, input logic [`FMTBITS-1:0] FmtE, + input logic XsE, input logic [`NF:0] XmE, YmE, input logic [`NE-1:0] XeE, YeE, input logic XInfE, YInfE, @@ -48,23 +49,22 @@ module divsqrt( output logic DivDone, output logic [`NE+1:0] QeM, output logic [`DURLEN-1:0] EarlyTermShiftM, - output logic [`QLEN-1-(`RADIX/4):0] QmM + output logic [`DIVb-(`RADIX/4):0] QmM // output logic [`XLEN-1:0] RemM, ); - logic [`DIVLEN+3:0] NextWSN, NextWCN; - logic [`DIVLEN+3:0] WS, WC; - logic [`DIVLEN+3:0] StickyWSA; - logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic [`DIVLEN+3:0] X; - logic [`DIVLEN+3:0] Dpreproc; + logic [`DIVb+3:0] NextWSN, NextWCN; + logic [`DIVb+3:0] WS, WC; + logic [`DIVb+3:0] StickyWSA; + logic [`DIVb:0] X; + logic [`DIVN-2:0] Dpreproc; logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc); - srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, + srtfsm srtfsm(.reset, .XsE, .SqrtE, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, - .StickyWSA, .DivBusy, .Qm(QmM), .Rem()); + srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Qm(QmM)); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 20e4a0099..934aba2cd 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -178,14 +178,14 @@ module fctrl ( // enables: // X - all except int->fp, store, load, mv int->fp -// Y - all except cvt, mv, load, class +// Y - all except cvt, mv, load, class, sqrt // Z - fma ops only // load/store mv int->fp cvt int->fp assign XEnE = ~(((FResSelE==2'b10)&~FWriteIntE)|((FResSelE==2'b11)&FRegWriteE)|((FResSelE==2'b01)&(PostProcSelE==2'b00)&OpCtrlE[2])); // load/class mv cvt - assign YEnE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&(PostProcSelE==2'b00))); + assign YEnE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&((PostProcSelE==2'b00)|((PostProcSelE==2'b01)&OpCtrlE[0])))); assign ZEnE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&(~OpCtrlE[2]|OpCtrlE[1]); - assign YEnForwardE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&(PostProcSelE==2'b00))); + assign YEnForwardE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&((PostProcSelE==2'b00)|((PostProcSelE==2'b01)&OpCtrlE[0])))); assign ZEnForwardE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&~OpCtrlE[2]; // Final Res Sel: diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index c169ab2fa..403b65fe4 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -126,11 +126,11 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero|Invalid); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed - assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero); + assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid); // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds) @@ -163,7 +163,7 @@ module flags( // if dividing by zero and not 0/0 // - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator) - assign DivByZero = YZero&DivOp&~(XZero|NaNIn|InfIn); + assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn); // Combine flags // - to integer results do not set the underflow or overflow flags diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 3e214b0f1..d98079b2e 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -125,7 +125,7 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`QLEN-1-(`RADIX/4):0] QmM; + logic [`DIVb-(`RADIX/4):0] QmM; logic [`NE+1:0] QeE, QeM; logic DivSE, DivSM; logic DivDoneM; @@ -260,7 +260,7 @@ module fpu ( // - fsqrt // *** add other opperations divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), - .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), + .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), .XsE, .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal .EarlyTermShiftM, .QmM, .DivDone(DivDoneM)); // compare diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 7ecb823e6..1e794391d 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -32,16 +32,16 @@ module otfc2 ( input logic qp, qz, - input logic [`QLEN-1:0] Q, QM, - output logic [`QLEN-1:0] QNext, QMNext + input logic [`DIVb:0] Q, QM, + output logic [`DIVb:0] QNext, QMNext ); // The on-the-fly converter transfers the quotient // bits to the quotient as they come. // Use this otfc for division only. - logic [`QLEN-2:0] QR, QMR; + logic [`DIVb-1:0] QR, QMR; - assign QR = Q[`QLEN-2:0]; - assign QMR = QM[`QLEN-2:0]; // Shifted Q and QM + assign QR = Q[`DIVb-1:0]; + assign QMR = QM[`DIVb-1:0]; // Shifted Q and QM always_comb begin if (qp) begin @@ -96,8 +96,8 @@ endmodule module otfc4 ( input logic [3:0] q, - input logic [`QLEN-1:0] Q, QM, - output logic [`QLEN-1:0] QNext, QMNext + input logic [`DIVb:0] Q, QM, + output logic [`DIVb:0] QNext, QMNext ); // The on-the-fly converter transfers the quotient @@ -113,7 +113,7 @@ module otfc4 ( // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. - logic [`QLEN-3:0] QR, QMR; + logic [`DIVb-2:0] QR, QMR; // shift Q (quotent) and QM (quotent-1) // if q = 2 Q = {Q, 10} QM = {Q, 01} @@ -122,8 +122,8 @@ module otfc4 ( // else if q = -1 Q = {QM, 11} QM = {QM, 10} // else if q = -2 Q = {QM, 10} QM = {QM, 01} - assign QR = Q[`QLEN-3:0]; - assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM + assign QR = Q[`DIVb-2:0]; + assign QMR = QM[`DIVb-2:0]; // Shifted Q and QM always_comb begin if (q[3]) begin // +2 QNext = {QR, 2'b10}; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 4d9dc310f..003c23d7a 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -60,7 +60,7 @@ module postprocess ( input logic DivS, input logic DivDone, input logic [`NE+1:0] DivQe, - input logic [`QLEN-1-(`RADIX/4):0] DivQm, + input logic [`DIVb-(`RADIX/4):0] DivQm, // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -154,7 +154,7 @@ module postprocess ( .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); + divshiftcalc divshiftcalc(.Fmt, .Sqrt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSel) @@ -199,7 +199,7 @@ module postprocess ( roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, - .FmaSs, .Xs, .Ys, .CvtCs, .Ms); + .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 87c6a4b25..afb5b1d4b 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -89,17 +89,17 @@ module fgen2 ( endmodule module qsel4 ( - input logic [`DIVLEN+3:0] D, - input logic [`DIVLEN+3:0] WS, WC, + input logic [`DIVN-2:0] D, + input logic [`DIVb+3:0] WS, WC, input logic Sqrt, output logic [3:0] q ); logic [6:0] Wmsbs; logic [7:0] PreWmsbs; logic [2:0] Dmsbs; - assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; + assign PreWmsbs = WC[`DIVb+3:`DIVb-4] + WS[`DIVb+3:`DIVb-4]; assign Wmsbs = PreWmsbs[7:1]; - assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + assign Dmsbs = D[`DIVN-2:`DIVN-4];//|{3{D[`DIVN-2]&Sqrt}}; // D = 0001.xxx... // Dmsbs = | | // W = xxxx.xxx... @@ -177,8 +177,8 @@ module fgen4 ( assign F2 = (~S << 2) & (C << 2); assign F1 = ~(S << 1) & C; assign F0 = '0; - assign FN1 = (SM << 1) | (C & ~(C << 2)); - assign FN2 = (SM << 2) | ((C << 2)&~(C <<4)); + assign FN1 = (SM << 1) | (C & ~(C << 3)); + assign FN2 = (SM << 2) | ((C << 2)&~(C << 4)); // Choose which adder input will be used diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv index acecb5947..62e882e65 100644 --- a/pipelined/src/fpu/roundsign.sv +++ b/pipelined/src/fpu/roundsign.sv @@ -34,6 +34,7 @@ module roundsign( input logic Xs, input logic Ys, input logic FmaNegSum, + input logic Sqrt, input logic FmaOp, input logic DivOp, input logic CvtOp, @@ -44,7 +45,7 @@ module roundsign( logic Qs; - assign Qs = Xs^Ys; + assign Qs = Xs^(Ys&~Sqrt); // Sign for rounding calulation assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 633ac1787..55cde36de 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -37,40 +37,43 @@ module srt( input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, input logic Sqrt, - input logic [`DIVLEN+3:0] X, - input logic [`DIVLEN+3:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic [`DIVb:0] X, + input logic [`DIVN-2:0] Dpreproc, input logic NegSticky, - output logic [`QLEN-1-(`RADIX/4):0] Qm, - output logic [`DIVLEN+3:0] NextWSN, NextWCN, - output logic [`DIVLEN+3:0] StickyWSA, - output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`XLEN-1:0] Rem + output logic [`DIVb-(`RADIX/4):0] Qm, + output logic [`DIVb+3:0] NextWSN, NextWCN, + output logic [`DIVb+3:0] StickyWSA, + output logic [`DIVb+3:0] FirstWS, FirstWC ); - +//QLEN = 1.(number of bits created for division) +// N is NF+1 or XLEN +// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift +// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-1:0 +// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0 +// Q/QM/S/SM should be 1.b so b+1 bits or b:0 +// C needs to be the lenght of the final fraction 0.b so b or b-1:0 /* verilator lint_off UNOPTFLAT */ - logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] Q[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] S[`DIVCOPIES-1:0]; //***change to QLEN??? - logic [`DIVLEN+3:0] SM[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] SNext[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] SMNext[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] C[`DIVCOPIES-1:0]; + logic [`DIVb+3:0] WSA[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WCA[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WS[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WC[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb:0] Q[`DIVCOPIES-1:0]; // U1.b + logic [`DIVb:0] QM[`DIVCOPIES-1:0];// 1.b + logic [`DIVb:0] QNext[`DIVCOPIES-1:0];// U1.b + logic [`DIVb:0] QMNext[`DIVCOPIES-1:0];// U1.b + logic [`DIVb:0] S[`DIVCOPIES-1:0];// U1.b + logic [`DIVb:0] SM[`DIVCOPIES-1:0];// U1.b + logic [`DIVb:0] SNext[`DIVCOPIES-1:0];// U1.b + logic [`DIVb:0] SMNext[`DIVCOPIES-1:0];// U1.b + logic [`DIVb-1:0] C[`DIVCOPIES-1:0]; // 0.b /* verilator lint_on UNOPTFLAT */ - logic [`DIVLEN+3:0] WSN, WCN; - logic [`DIVLEN+3:0] D, DBar, D2, DBar2; - logic [$clog2(`XLEN+1)-1:0] intExp; - logic intSign; - logic [`QLEN-1:0] QMMux; - logic [`DIVLEN+3:0] CMux; - logic [`DIVLEN+3:0] SMux; + logic [`DIVb+3:0] WSN, WCN; // Q4.N-1 + logic [`DIVN-2:0] D; // U0.N-1 + logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1 + logic [`DIVb:0] QMMux; + logic [`DIVb-1:0] CMux; + logic [`DIVb:0] SMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -81,27 +84,28 @@ module srt( // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized if (`RADIX == 2) begin : nextw - assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0}; - assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0}; + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVb+2:0], 1'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVb+2:0], 1'b0}; end else begin - assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; - assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVb+1:0], 2'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVb+1:0], 2'b0}; end - mux2 #(`DIVLEN+4) wsmux(NextWSN, X, DivStart, WSN); - flopen #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); - mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); - flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); - flopen #(`DIVLEN+4) dflop(clk, DivStart, Dpreproc, D); - mux2 #(`DIVLEN+4) Cmux({2'b11, C[`DIVCOPIES-1][`DIVLEN+3:2]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, DivStart, CMux); - flop #(`DIVLEN+4) cflop(clk, CMux, C[0]); + mux2 #(`DIVb+4) wsmux(NextWSN, {3'b0, X}, DivStart, WSN); + flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); + mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN); + flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); + flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D); + mux2 #(`DIVb) Cmux({2'b11, C[`DIVCOPIES-1][`DIVb-1:2]}, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux); + flop #(`DIVb) cflop(clk, CMux, C[0]); // Divisor Selections - // - choose the negitive version of what's being selected - assign DBar = ~D; + // - choose the negitive version of what's being selected + // - D is only the fraction + assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}}; if(`RADIX == 4) begin : d2 - assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; - assign D2 = {D[`DIVLEN+2:0], 1'b0}; + assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}}; + assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}}; end genvar i; @@ -112,12 +116,13 @@ module srt( .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i])); if(i<(`DIVCOPIES-1)) begin if (`RADIX==2)begin - assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0}; - assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 1'b0}; + assign WS[i+1] = {WSA[i][`DIVb+2:0], 1'b0}; + assign WC[i+1] = {WCA[i][`DIVb+2:0], 1'b0}; + assign C[i+1] = {1'b1, C[i][`DIVb-1:1]}; end else begin - assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; - assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; - assign C[i+1] = {2'b11, C[i][`DIVLEN+3:2]}; + assign WS[i+1] = {WSA[i][`DIVb+1:0], 2'b0}; + assign WC[i+1] = {WCA[i][`DIVb+1:0], 2'b0}; + assign C[i+1] = {2'b11, C[i][`DIVb-1:2]}; end assign Q[i+1] = QNext[i]; assign QM[i+1] = QMNext[i]; @@ -128,30 +133,30 @@ module srt( endgenerate // if starting a new divison set Q to 0 and QM to -1 - mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); - flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); - flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - - flopr #(`DIVLEN+4) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]); - mux2 #(`DIVLEN+4) Smux(SNext[`DIVCOPIES-1], {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, DivStart, SMux); - flop #(`DIVLEN+4) Sreg(clk, SMux, S[0]); + mux2 #(`DIVb+1) QMmux(QMNext[`DIVCOPIES-1], '1, DivStart, QMMux); + flopenr #(`DIVb+1) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); + flopen #(`DIVb+1) QMreg(clk, DivBusy, QMMux, QM[0]); + flopr #(`DIVb+1) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]); + mux2 #(`DIVb+1) Smux(SNext[`DIVCOPIES-1], {Sqrt, {(`DIVb){1'b0}}}, DivStart, SMux); + flop #(`DIVb+1) Sreg(clk, SMux, S[0]); + // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted always_comb - if(Sqrt) - if(NegSticky) Qm = SM[0][`QLEN-1-(`RADIX/4):0]; - else Qm = S[0][`QLEN-1-(`RADIX/4):0]; + if(Sqrt) // sqrt ouputs in the range (1, .5] + if(NegSticky) Qm = {SM[0][`DIVb-1-(`RADIX/4):0], 1'b0}; + else Qm = {S[0][`DIVb-1-(`RADIX/4):0], 1'b0}; else - if(NegSticky) Qm = QM[0][`QLEN-1-(`RADIX/4):0]; - else Qm = Q[0][`QLEN-1-(`RADIX/4):0]; + if(NegSticky) Qm = QM[0][`DIVb-(`RADIX/4):0]; + else Qm = Q[0][`DIVb-(`RADIX/4):0]; assign FirstWS = WS[0]; assign FirstWC = WC[0]; if(`RADIX==2) if (`DIVCOPIES == 1) - assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0}; + assign StickyWSA = {WSA[0][`DIVb+2:0], 1'b0}; else - assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; + assign StickyWSA = {WSA[1][`DIVb+2:0], 1'b0}; endmodule @@ -162,24 +167,24 @@ endmodule /* verilator lint_off UNOPTFLAT */ module divinteration ( - input logic [`DIVLEN+3:0] D, - input logic [`DIVLEN+3:0] DBar, D2, DBar2, - input logic [`QLEN-1:0] Q, QM, - input logic [`DIVLEN+3:0] S, SM, - input logic [`DIVLEN+3:0] WS, WC, - input logic [`DIVLEN+3:0] C, + input logic [`DIVN-2:0] D, + input logic [`DIVb+3:0] DBar, D2, DBar2, + input logic [`DIVb:0] Q, QM, + input logic [`DIVb:0] S, SM, + input logic [`DIVb+3:0] WS, WC, + input logic [`DIVb-1:0] C, input logic Sqrt, - output logic [`QLEN-1:0] QNext, QMNext, - output logic [`DIVLEN+3:0] SNext, SMNext, - output logic [`DIVLEN+3:0] WSA, WCA + output logic [`DIVb:0] QNext, QMNext, + output logic [`DIVb:0] SNext, SMNext, + output logic [`DIVb+3:0] WSA, WCA ); /* verilator lint_on UNOPTFLAT */ - logic [`DIVLEN+3:0] Dsel; + logic [`DIVb+3:0] Dsel; logic [3:0] q; - logic qp, qz;//, qn; - logic [`DIVLEN+3:0] F; - logic [`DIVLEN+3:0] AddIn; + logic qp, qz; + logic [`DIVb+3:0] F; + logic [`DIVb+3:0] AddIn; // Qmient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) @@ -190,21 +195,21 @@ module divinteration ( // 0010 = -1 // 0001 = -2 if(`RADIX == 2) begin : qsel - qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn); + qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz); end else begin qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q); - fgen4 fgen4(.s(q), .C, .S, .SM, .F); + // fgen4 fgen4(.s(q), .C, .S, .SM, .F); end if(`RADIX == 2) begin : dsel - assign Dsel = {`DIVLEN+4{~qz}}&(qp ? DBar : D); + assign Dsel = {`DIVb+4{~qz}}&(qp ? DBar : {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}); end else begin always_comb case (q) 4'b1000: Dsel = DBar2; 4'b0100: Dsel = DBar; 4'b0000: Dsel = '0; - 4'b0010: Dsel = D; + 4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; 4'b0001: Dsel = D2; default: Dsel = 'x; endcase @@ -213,16 +218,16 @@ module divinteration ( // WSA, WCA = WS + WC - qD assign AddIn = Sqrt ? F : Dsel; if (`RADIX == 2) begin : csa - csa #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA); + csa #(`DIVb+4) csa(WS, WC, AddIn, qp, WSA, WCA); end else begin - csa #(`DIVLEN+4) csa(WS, WC, AddIn, |q[3:2], WSA, WCA); + csa #(`DIVb+4) csa(WS, WC, AddIn, |q[3:2]&~Sqrt, WSA, WCA); end if (`RADIX == 2) begin : otfc otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext); end else begin otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); - sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext); + // sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext); end endmodule diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 597f96cd7..7fe6b6b73 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -33,14 +33,16 @@ module srtfsm( input logic clk, input logic reset, - input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC, + input logic [`DIVb+3:0] NextWSN, NextWCN, WS, WC, input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, input logic DivStart, + input logic XsE, + input logic SqrtE, input logic StallE, input logic StallM, - input logic [`DIVLEN+3:0] StickyWSA, + input logic [`DIVb+3:0] StickyWSA, input logic [`DURLEN-1:0] Dur, output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivSE, @@ -55,11 +57,11 @@ module srtfsm( logic [`DURLEN-1:0] step; logic WZero; //logic [$clog2(`DIVLEN/2+3)-1:0] Dur; - logic [`DIVLEN+3:0] W; + logic [`DIVb+3:0] W; //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); assign DivBusy = (state == BUSY); - assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0}); + assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0}); // calculate sticky bit // - there is a chance that a value is subtracted infinitly, resulting in an exact QM result // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant @@ -70,7 +72,7 @@ module srtfsm( assign DivSE = |W; assign DivDone = (state == DONE); assign W = WC+WS; - assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? + assign NegSticky = W[`DIVb+3]; assign EarlyTermShiftE = step; always_ff @(posedge clk) begin @@ -78,7 +80,7 @@ module srtfsm( state <= #1 IDLE; end else if (DivStart&~StallE) begin step <= Dur; - if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE; + if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE|(XsE&SqrtE)) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index 4d2609179..63b2b9779 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -39,16 +39,16 @@ module srtpreproc ( input logic Sqrt, input logic XZero, output logic [`NE+1:0] QeM, - output logic [`DIVLEN+3:0] X, - output logic [`DIVLEN+3:0] Dpreproc, - output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + output logic [`DIVb:0] X, + output logic [`DIVN-2:0] Dpreproc, output logic [`DURLEN-1:0] Dur ); // logic [`XLEN-1:0] PosA, PosB; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; logic [`NF-1:0] PreprocA, PreprocX; logic [`NF-1:0] PreprocB, PreprocY; - logic [`NF+3:0] SqrtX; + logic [`NF+1:0] SqrtX; + logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`NE+1:0] Qe; // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; @@ -70,9 +70,9 @@ module srtpreproc ( assign PreprocY = Ym[`NF-1:0]< -#include +#include #include /* Constants */ diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 9be68f507..17383d1f7 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -80,7 +80,7 @@ module testbenchfp; logic CvtResSgnE; logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by - logic [`QLEN-1-(`RADIX/4):0] Quot; + logic [`DIVb-(`RADIX/4):0] Quot; logic CvtResDenormUfE; logic [`DURLEN-1:0] EarlyTermShift; logic DivStart, DivBusy; @@ -256,16 +256,16 @@ module testbenchfp; Fmt = {Fmt, 2'b11}; end end - // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested - // // add the square-root tests/op-ctrls/unit/fmt - // Tests = {Tests, f128sqrt}; - // OpCtrl = {OpCtrl, `SQRT_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b11}; - // end - // end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f128sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested Tests = {Tests, f128fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; @@ -383,16 +383,16 @@ module testbenchfp; Fmt = {Fmt, 2'b01}; end end - // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f64sqrt}; - // OpCtrl = {OpCtrl, `SQRT_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b01}; - // end - // end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested Tests = {Tests, f64fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; @@ -494,16 +494,16 @@ module testbenchfp; Fmt = {Fmt, 2'b00}; end end - // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f32sqrt}; - // OpCtrl = {OpCtrl, `SQRT_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b00}; - // end - // end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end if (TEST === "fma" | TEST === "all") begin // if fma is being tested Tests = {Tests, f32fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; @@ -587,16 +587,16 @@ module testbenchfp; Fmt = {Fmt, 2'b10}; end end - // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f16sqrt}; - // OpCtrl = {OpCtrl, `SQRT_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b10}; - // end - // end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end if (TEST === "fma" | TEST === "all") begin // if fma is being tested Tests = {Tests, f16fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; @@ -697,7 +697,7 @@ module testbenchfp; fcmp fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal), .Xs, .Ys, .Xe, .Ye, .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes), .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes)); - divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(1'b0), .SqrtM(1'b0), + divsqrt divsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]), .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp), .EarlyTermShiftM(EarlyTermShift), .QmM(Quot), .DivDone); @@ -1007,40 +1007,72 @@ module readvectors ( end endcase `DIVUNIT: - case (Fmt) - 2'b11: begin // quad - X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; - Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; - Ans = TestVector[8+(`Q_LEN-1):8]; - if (~clk) #5; - DivStart = 1'b1; #10 // one clk cycle - DivStart = 1'b0; - end - 2'b01: if (`D_SUPPORTED)begin // double - X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; - Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; - Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; - if (~clk) #5; - DivStart = 1'b1; #10 - DivStart = 1'b0; - end - 2'b00: if (`S_SUPPORTED)begin // single - X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; - Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; - Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; - if (~clk) #5; - DivStart = 1'b1; #10 - DivStart = 1'b0; - end - 2'b10: begin // half - X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; - Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; - Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; - if (~clk) #5; - DivStart = 1'b1; #10 - DivStart = 1'b0; - end - endcase + if(OpCtrl[0]) + case (Fmt) + 2'b11: begin // quad + X = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; + Ans = TestVector[8+(`Q_LEN-1):8]; + if (~clk) #5; + DivStart = 1'b1; #10 // one clk cycle + DivStart = 1'b0; + end + 2'b01: if (`D_SUPPORTED)begin // double + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + 2'b00: if (`S_SUPPORTED)begin // single + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + 2'b10: begin // half + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + endcase + else + case (Fmt) + 2'b11: begin // quad + X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; + Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; + Ans = TestVector[8+(`Q_LEN-1):8]; + if (~clk) #5; + DivStart = 1'b1; #10 // one clk cycle + DivStart = 1'b0; + end + 2'b01: if (`D_SUPPORTED)begin // double + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; + Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + 2'b00: if (`S_SUPPORTED)begin // single + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; + Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + 2'b10: begin // half + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; + Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + endcase `CMPUNIT: case (Fmt) 2'b11: begin // quad @@ -1259,7 +1291,7 @@ module readvectors ( end assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]); - assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)); + assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0])); assign ZEn = (Unit == `FMAUNIT); unpack unpack(.X, .Y, .Z, .Fmt(ModFmt), .Xs, .Ys, .Zs, .Xe, .Ye, .Ze, From 4da96c5791b9b71dad7e8d7129c360905a284d31 Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Fri, 22 Jul 2022 15:35:20 -0700 Subject: [PATCH 092/138] fixed 32priv tests, now passing --- .../references/WALLY-trap-u-01.reference_output | 12 ++++++------ .../rv32i_m/privilege/src/WALLY-clint-01.S | 2 +- .../rv32i_m/privilege/src/WALLY-trap-sret-01.S | 2 +- .../rv32i_m/privilege/src/WALLY-trap-u-01.S | 2 +- .../rv32i_m/privilege/src/WALLY-wfi-01.S | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-u-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-u-01.reference_output index 5c6d8378d..22699f42f 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-u-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-u-01.reference_output @@ -9,16 +9,16 @@ 00000000 # stval of faulting instruction (0x0) 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000003 # scause from Breakpoint -80000168 # stval of breakpoint instruction adress +8000015c # stval of breakpoint instruction adress 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000004 # scause from load address misaligned -80000171 # stval of misaligned address +80000165 # stval of misaligned address 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000005 # scause from load access 00000000 # stval of accessed adress (0x0) 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000006 # scause from store misaligned -80000189 # stval of address with misaligned store instr +8000017d # stval of address with misaligned store instr 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000007 # scause from store access 00000000 # stval of accessed address (0x0) @@ -57,16 +57,16 @@ fffff7ff # medeleg after attempted write of all 1's (only some bits are writeabl 00000000 # stval of faulting instruction (0x0) 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000003 # scause from Breakpoint -80000168 # stval of breakpoint instruction adress +8000015c # stval of breakpoint instruction adress 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000004 # scause from load address misaligned -80000171 # stval of misaligned address +80000165 # stval of misaligned address 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000005 # scause from load access 00000000 # stval of accessed adress (0x0) 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000006 # scause from store misaligned -80000189 # stval of address with misaligned store instr +8000017d # stval of address with misaligned store instr 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000007 # scause from store access 00000000 # stval of accessed address (0x0) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S index 69c0bb45f..7aac05823 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S @@ -23,7 +23,7 @@ #include "WALLY-TEST-LIB-32.h" RVTEST_ISA("RV32I") -RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",clint) #def NO_SAIL=True; +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",clint) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-sret-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-sret-01.S index 664eed600..e0f81a2c0 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-sret-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-sret-01.S @@ -24,7 +24,7 @@ #include "WALLY-TEST-LIB-32.h" RVTEST_ISA("RV32I") -RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",trap-sret) +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",trap-sret) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-u-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-u-01.S index 24fa9828c..df812db64 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-u-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-trap-u-01.S @@ -24,7 +24,7 @@ #include "WALLY-TEST-LIB-32.h" RVTEST_ISA("RV32I") -RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",trap-u) +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",trap-u) INIT_TESTS diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-wfi-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-wfi-01.S index 9669c6970..720d83956 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-wfi-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-wfi-01.S @@ -24,7 +24,7 @@ #include "WALLY-TEST-LIB-32.h" RVTEST_ISA("RV32I") -RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;",wfi) +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*); def Drvtest_mtrap_routine=True;def TEST_CASE_1=True;def NO_SAIL=True;",wfi) INIT_TESTS From d0aaae26fe61e9ac4109778c60ed1a48f63695af Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Fri, 22 Jul 2022 17:09:46 -0700 Subject: [PATCH 093/138] fixed wally rv32e tests, updated regression makefile to new testflow --- .gitignore | 1 + pipelined/regression/Makefile | 17 +++++---------- tests/riscof/Makefile | 21 +++++++++++++----- tests/riscof/spike/spike_rv32e_isa.yaml | 29 +++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 16 deletions(-) create mode 100644 tests/riscof/spike/spike_rv32e_isa.yaml diff --git a/.gitignore b/.gitignore index 0c8a1655a..49d1e1603 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ testsBP/*/*.a tests/wally-riscv-arch-test/riscv-test-suite/*/I/*/* tests/riscof/riscof_work/ tests/riscof/config32.ini +tests/riscof/config32e.ini tests/riscof/config64.ini tests/linux-testgen/linux-testvectors/* !tests/linux-testgen/linux-testvectors/tvCopier.py diff --git a/pipelined/regression/Makefile b/pipelined/regression/Makefile index 5ad721722..6174464e4 100644 --- a/pipelined/regression/Makefile +++ b/pipelined/regression/Makefile @@ -1,4 +1,4 @@ -all: archtests wallytests memfiles +all: riscoftests memfiles # *** Build old tests/imperas-riscv-tests for now; # Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test # DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired @@ -19,18 +19,13 @@ allclean: clean all clean: make clean -C ../../tests/riscof - make clean -C ../../tests/wally-riscv-arch-test +# make clean -C ../../tests/wally-riscv-arch-test # make allclean -C ../../tests/imperas-riscv-tests -archtests: - # Build riscv-arch-test 64 and 32-bit versions - make -C ../../tests/riscof/ --jobs - make -C ../../tests/riscof/ XLEN=32 --jobs - -wallytests: - # Build wally-riscv-arch-test - make -C ../../tests/wally-riscv-arch-test/ --jobs - make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs +riscoftests: +# Builds riscv-arch-test 64 and 32-bit versions and builds wally-riscv-arch-test 64 and 32-bit versions + make -C ../../tests/riscof/ + make -C ../../tests/riscof/ XLEN=32 memfiles: make -f makefile-memfile wally-sim-files --jobs diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index fffa0e454..e343d8354 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) XLEN ?= 64 -all: root build_arch #build_wally memfile +all: root build_rv32e build_wally build_arch root: mkdir -p $(work_dir) @@ -25,14 +25,24 @@ fsd_fld_tempfix: build_arch: fsd_fld_tempfix riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser - rm -rf $(arch_workdir)/rv$(XLEN)i_m +# rm -rf $(arch_workdir)/rv$(XLEN)i_m rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" rsync -a $(work_dir)/rv64i_m/ $(arch_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" build_wally: - riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run 2>&1 | tee log.txt - rm -rf $(wally_workdir)/rv$(XLEN)i_m - mv -f $(work_dir)/rv$(XLEN)i_m $(wally_workdir)/ + riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run +# riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run 2>&1 | tee log.txt +# rm -rf $(wally_workdir)/rv$(XLEN)i_m +# mv -f $(work_dir)/rv$(XLEN)i_m $(wally_workdir)/ + rsync -a $(work_dir)/rv32i_m/ $(wally_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" + rsync -a $(work_dir)/rv64i_m/ $(wally_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" + +build_rv32e: + sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini + riscof run --work-dir=$(work_dir) --config=config32e.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run +# riscof --verbose debug run --work-dir=$(work_dir) --config=config32e.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run 2>&1 | tee log.txt + rsync -a $(work_dir)/rv32i_m/ $(wally_workdir)/rv32i_m/ || echo "error suppressed" + rsync -a $(work_dir)/rv32e_unratified/ $(wally_workdir)/rv32e_unratified/ || echo "error suppressed" memfile: find $(work) -type f -name "*.elf" | grep "rv64i_m" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 64 --input "$$f" --output "$$f.memfile"; done @@ -42,6 +52,7 @@ memfile: clean: rm -f config64.ini rm -f config32.ini + rm -f config32e.ini rm -rf $(work_dir) rm -rf $(wally_workdir) rm -rf $(arch_workdir) \ No newline at end of file diff --git a/tests/riscof/spike/spike_rv32e_isa.yaml b/tests/riscof/spike/spike_rv32e_isa.yaml new file mode 100644 index 000000000..7ae54347a --- /dev/null +++ b/tests/riscof/spike/spike_rv32e_isa.yaml @@ -0,0 +1,29 @@ +hart_ids: [0] +hart0: + ISA: RV32EMFCZicsr_Zifencei + physical_addr_sz: 32 + User_Spec_Version: '2.3' + supported_xlen: [32] + misa: + reset-val: 0x40001034 + rv32: + accessible: true + mxl: + implemented: true + type: + warl: + dependency_fields: [] + legal: + - mxl[1:0] in [0x1] + wr_illegal: + - Unchanged + extensions: + implemented: true + type: + warl: + dependency_fields: [] + legal: + - extensions[25:0] bitmask [0x0001034, 0x0000000] + wr_illegal: + - Unchanged + From ca4511b6dc57ca0ba0858f0bf36cdc22cf19e5f7 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 22 Jul 2022 17:13:19 -0700 Subject: [PATCH 094/138] Fixed UART FIFO bugs and added FIFO tests --- pipelined/src/uncore/uartPC16550D.sv | 11 +++++++--- .../references/WALLY-uart-01.reference_output | 13 +++++++++++- .../rv32i_m/privilege/src/WALLY-uart-01.S | 20 +++++++++++++++++++ 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv index 5eabe60c7..dcb04883f 100644 --- a/pipelined/src/uncore/uartPC16550D.sv +++ b/pipelined/src/uncore/uartPC16550D.sv @@ -324,7 +324,9 @@ module uartPC16550D( if (~PRESETn) begin rxfifohead <= #1 0; rxfifotail <= #1 0; rxdataready <= #1 0; RXBR <= #1 0; end else begin - if (rxstate == UART_DONE) begin + if (~MEMWb & (A == 3'b010) & Din[1]) begin + rxfifohead <= #1 0; rxfifotail <= #1 0; rxdataready <= #1 0; + end else if (rxstate == UART_DONE) begin RXBR <= #1 {rxoverrunerr, rxparityerr, rxframingerr, rxdata}; // load recevive buffer register if (rxoverrunerr) $warning("UART RX Overrun Error\n"); if (rxparityerr) $warning("UART RX Parity Error\n"); @@ -337,7 +339,8 @@ module uartPC16550D( end else if (~MEMRb & A == 3'b000 & ~DLAB) begin // reading RBR updates ready / pops fifo if (fifoenabled) begin if (~rxfifoempty) rxfifotail <= #1 rxfifotail + 1; - if (rxfifoempty) rxdataready <= #1 0; + // if (rxfifoempty) rxdataready <= #1 0; + if (rxfifoentries == 1) rxdataready <= #1 0; // When reading the last entry, data ready becomes zero end else begin rxdataready <= #1 0; RXBR <= #1 {1'b0, RXBR[9:0]}; // Ben 31 March 2022: I added this so that rxoverrunerr permanently goes away upon reading RBR (when not in FIFO mode) @@ -448,6 +451,8 @@ module uartPC16550D( always_ff @(posedge PCLK, negedge PRESETn) if (~PRESETn) begin txfifohead <= #1 0; txfifotail <= #1 0; txhrfull <= #1 0; txsrfull <= #1 0; TXHR <= #1 0; txsr <= #1 12'hfff; + end else if (~MEMWb & (A == 3'b010) & Din[2]) begin + txfifohead <= #1 0; txfifotail <= #1 0; end else begin if (~MEMWb & A == 3'b000 & ~DLAB) begin // writing transmit holding register or fifo if (fifoenabled) begin @@ -461,7 +466,7 @@ module uartPC16550D( end if (txstate == UART_IDLE) begin // move data into tx shift register if available if (fifoenabled) begin - if (~txfifoempty) begin + if (~txfifoempty & ~txsrfull) begin txsr <= #1 txdata; txfifotail <= #1 txfifotail+1; txsrfull <= #1 1; diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output index 899884395..cfe54cb1c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output @@ -39,6 +39,17 @@ ffffffB8 ffffffC2 # FIFO interrupt 0000C101 00000000 - +ffffffC1 +0000C401 +ffffffA5 +ffffffC1 +00000001 +00000002 +00000061 +00000003 +00000060 +0000C101 +ffffffC1 +00000060 0000000b # ecall from test termination diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S index 82aca9867..6d8f70660 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S @@ -160,5 +160,25 @@ test_cases: .4byte UART_THR, 0x00, write08_test # write 0 to transmit register .4byte 0x0, 0xC101, uart_data_wait # no interrupts pending (transmitter interrupt squashed by early read) .4byte UART_RBR, 0x00, read08_test # read 0 from buffer register +.4byte UART_THR, 0xA5, write08_test # Write A5 to transmit register +.4byte UART_THR, 0x01, write08_test # Write 1 to transmit register +.4byte UART_IIR, 0xC1, read08_test # no interrupts pending +.4byte UART_THR, 0x02, write08_test # Write 2 to transmit register +.4byte UART_THR, 0x03, write08_test # Write 3 to transmit register +.4byte 0x0, 0xC401, uart_data_wait # Interrupt due to data ready +.4byte UART_RBR, 0xA5, read08_test # Read A5 from buffer register +.4byte UART_IIR, 0xC2, read08_test # Data ready interrupt cleared +.4byte UART_RBR, 0x01, read08_test # Read 1 from buffer register +.4byte UART_RBR, 0x02, read08_test # Read 2 from buffer register +.4byte UART_LSR, 0x61, read08_test # Data ready, 1 item left in FIFO +.4byte UART_RBR, 0x03, read08_test # Read 3 from buffer register +.4byte UART_LSR, 0x60, read08_test # No data ready, FIFO is empty +.4byte UART_THR, 0xFF, write08_test # Write FF to transmit register +.4byte UART_THR, 0xFE, write08_test # Write FE to transmit register +.4byte 0x0, 0xC101, uart_data_wait # Interrupt due to data ready +.4byte UART_FCR, 0xC7, write08_test # Clear all bytes in FIFO +.4byte UART_FCR, 0xC1, read08_test # Check that FCR clears bits 1 and 2 when written to 1 +.4byte UART_LSR, 0x60, read08_test # No data ready, FIFO cleared by writing to FCR + .4byte 0x0, 0x0, terminate_test \ No newline at end of file From 655e2d381041cc97216c9995fefbcee3a9804b23 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Sat, 23 Jul 2022 00:41:18 +0000 Subject: [PATCH 095/138] merged radix-2 sqrt into divider - doesnt work yet --- addins/riscv-arch-test | 2 +- pipelined/src/fpu/otfc.sv | 31 +++++++++++++------------------ pipelined/src/fpu/qsel.sv | 26 +++++++++++++++----------- pipelined/src/fpu/srt.sv | 21 ++++++++++++++++++++- 4 files changed, 49 insertions(+), 31 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index be67c99bd..e5020bf7b 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 +Subproject commit e5020bf7b345f8efb96c6c939de3162525b7f545 diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 1e794391d..71320fedf 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -62,36 +62,31 @@ endmodule // Square Root OTFC, Radix 2 // /////////////////////////////// module sotfc2( - input logic clk, - input logic Start, - input logic sp, sn, - input logic Sqrt, - input logic [`DIVLEN+3:0] C, - output logic [`DIVLEN-2:0] Sq, - output logic [`DIVLEN+3:0] S, SM + input logic sp, sz, + input logic [`DIVb-1:0] C, + input logic [`DIVb:0] S, SM, + output logic [`DIVb:0] SNext, SMNext ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. // Use this otfc for division and square root. - logic [`DIVLEN+3:0] SNext, SMNext, SMux; + logic [`DIVb:0] CExt; - flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); - mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); - flop #(`DIVLEN+4) Sreg(clk, SMux, S); + assign CExt = {1'b1, C}; always_comb begin if (sp) begin - SNext = S | (C & ~(C << 1)); + SNext = S | (CExt & ~(CExt << 1)); SMNext = S; - end else if (sn) begin - SNext = SM | (C & ~(C << 1)); - SMNext = SM; - end else begin // If sp and sn are not true, then sz is + end else if (sz) begin SNext = S; - SMNext = SM | (C & ~(C << 1)); + SMNext = SM | (CExt & ~(CExt << 1)); + end else begin // If sp and sz are not true, then sn is + SNext = SM | (CExt & ~(CExt << 1)); + SMNext = SM; end end - assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; + endmodule module otfc4 ( diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index afb5b1d4b..cb8d3202b 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -66,25 +66,29 @@ endmodule // Adder Input Generation, Radix 2 // //////////////////////////////////// module fgen2 ( - input logic sp, sn, - input logic [`DIVLEN+3:0] C, S, SM, - output logic [`DIVLEN+3:0] F + input logic sp, sz, + input logic [`DIVb-1:0] C, + input logic [`DIVb:0] S, SM, + output logic [`DIVb+3:0] F ); - logic [`DIVLEN+3:0] FP, FN, FZ; - + logic [`DIVb+3:0] FP, FN, FZ; + logic [`DIVb+3:0] SExt, SMExt, CExt; + + assign SExt = {3'b0, S}; + assign SMExt = {3'b0, SM}; + assign CExt = {4'hf, C}; + // Generate for both positive and negative bits - assign FP = ~(S << 1) & C; - assign FN = (SM << 1) | (C & (~C << 2)); + assign FP = ~(SExt << 1) & CExt; + assign FN = (SMExt << 1) | (CExt & (~CExt << 2)); assign FZ = '0; // Choose which adder input will be used always_comb if (sp) F = FP; - else if (sn) F = FN; - else F = FZ; - - // assign F = sp ? FP : (sn ? FN : FZ); + else if (sz) F = FZ; + else F = FN; endmodule diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 55cde36de..db2abf25a 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -72,6 +72,7 @@ module srt( logic [`DIVN-2:0] D; // U0.N-1 logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1 logic [`DIVb:0] QMMux; + logic [`DIVb-1:0] NextC; logic [`DIVb-1:0] CMux; logic [`DIVb:0] SMux; @@ -86,11 +87,22 @@ module srt( if (`RADIX == 2) begin : nextw assign NextWSN = {WSA[`DIVCOPIES-1][`DIVb+2:0], 1'b0}; assign NextWCN = {WCA[`DIVCOPIES-1][`DIVb+2:0], 1'b0}; + assign NextC = {1'b1, C[`DIVCOPIES-1][`DIVb-1:1]}; end else begin assign NextWSN = {WSA[`DIVCOPIES-1][`DIVb+1:0], 2'b0}; assign NextWCN = {WCA[`DIVCOPIES-1][`DIVb+1:0], 2'b0}; + assign NextC = {2'b11, C[`DIVCOPIES-1][`DIVb-1:2]}; end + +// mux2 #(`DIVb+4) wsmux(NextWSN, {{3{Sqrt}}, X}, DivStart, WSN); //*** modified for sqrt which doesnt work +// flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); +// mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN); +// flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); +// flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D); +// mux2 #(`DIVb) Cmux(NextC, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux); +// flop #(`DIVb) cflop(clk, CMux, C[0]); + mux2 #(`DIVb+4) wsmux(NextWSN, {3'b0, X}, DivStart, WSN); flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN); @@ -132,6 +144,7 @@ module srt( end endgenerate + // if starting a new divison set Q to 0 and QM to -1 mux2 #(`DIVb+1) QMmux(QMNext[`DIVCOPIES-1], '1, DivStart, QMMux); flopenr #(`DIVb+1) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); @@ -196,6 +209,7 @@ module divinteration ( // 0001 = -2 if(`RADIX == 2) begin : qsel qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz); + fgen2 fgen2(.sp(qp), .sz(qz), .C, .S, .SM, .F); end else begin qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q); // fgen4 fgen4(.s(q), .C, .S, .SM, .F); @@ -218,13 +232,14 @@ module divinteration ( // WSA, WCA = WS + WC - qD assign AddIn = Sqrt ? F : Dsel; if (`RADIX == 2) begin : csa - csa #(`DIVb+4) csa(WS, WC, AddIn, qp, WSA, WCA); + csa #(`DIVb+4) csa(WS, WC, AddIn, qp&~Sqrt, WSA, WCA); end else begin csa #(`DIVb+4) csa(WS, WC, AddIn, |q[3:2]&~Sqrt, WSA, WCA); end if (`RADIX == 2) begin : otfc otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext); + sotfc2 sotfc2(.sp(qp), .sz(qz), .C, .S, .SM, .SNext, .SMNext); end else begin otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); // sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext); @@ -254,3 +269,7 @@ module csa #(parameter N=69) ( assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | (in2[N-2:0] & in3[N-2:0]), cin}; endmodule + + + + From abc79c6c8ece7c3cc3f9619aed46b2ac43a20f9b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 22 Jul 2022 22:42:39 -0500 Subject: [PATCH 096/138] Possible improvement to cache which removes the cpu_busy states. --- pipelined/regression/wave.do | 424 ++++++++++++++++--------------- pipelined/src/cache/cache.sv | 6 +- pipelined/src/cache/cachefsm.sv | 18 +- pipelined/src/cache/cacheway.sv | 7 +- pipelined/src/cache/sram1p1rw.sv | 7 +- 5 files changed, 237 insertions(+), 225 deletions(-) diff --git a/pipelined/regression/wave.do b/pipelined/regression/wave.do index d11c248fd..eac613075 100644 --- a/pipelined/regression/wave.do +++ b/pipelined/regression/wave.do @@ -5,43 +5,44 @@ add wave -noupdate /testbench/reset add wave -noupdate /testbench/reset_ext add wave -noupdate /testbench/memfilename add wave -noupdate /testbench/dut/core/SATP_REGW -add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/BPPredWrongE -add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/CSRWriteFencePendingDEM -add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/RetM -add wave -noupdate -group HDU -group hazards -color Pink /testbench/dut/core/hzu/TrapM -add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/LoadStallD -add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/StoreStallD -add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/LSUStallM -add wave -noupdate -group HDU -group hazards /testbench/dut/core/MDUStallD -add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/DivBusyE -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InstrMisalignedFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InstrAccessFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/IllegalInstrFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/BreakpointFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadMisalignedFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoMisalignedFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadAccessFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoAccessFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/EcallFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InstrPageFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InterruptM -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/hzu/FlushF -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW -add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallF -add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallD -add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallE -add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallM -add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallW -add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName -add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/core/ifu/FinalInstrRawF -add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/core/ifu/InstrD -add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/core/ifu/InstrE -add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/core/ifu/InstrM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWriteFencePendingDEM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM +add wave -noupdate -expand -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/StoreStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/ifu/IFUStallF +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/MDUStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/DivBusyE +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/IllegalInstrFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/BreakpointFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/EcallFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrPageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/InterruptM +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/hzu/FlushF +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallF +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallD +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallE +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallM +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallW +add wave -noupdate -group {instruction pipeline} /testbench/InstrFName +add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/FinalInstrRawF +add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD +add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrE +add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrM add wave -noupdate -group {Decode Stage} /testbench/dut/core/ifu/PCD add wave -noupdate -group {Decode Stage} /testbench/dut/core/ifu/InstrD add wave -noupdate -group {Decode Stage} /testbench/InstrDName @@ -190,177 +191,177 @@ add wave -noupdate -group AHB /testbench/dut/core/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITED -add wave -noupdate -expand -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState -add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/SelHPTW -add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/InterlockStall -add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUStallM -add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM -add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataM -add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/WriteDataM -add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr -add wave -noupdate -expand -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState -add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/BusStall -add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead -add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite -add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr -add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck -add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA -add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA -add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr -add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD} -add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} -add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag -add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay -add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay -add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM -add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay -add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr -add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode -add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE -add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType -add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate -add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation -add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss -add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit -add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress -add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault -add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM -add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM -add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr -add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE -add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal -add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite -add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress -add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions -add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable -add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent -add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed -add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault -add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF -add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM -add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM -add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress -add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM -add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM -add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW -add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW -add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF -add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM -add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM -add wave -noupdate -expand -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState -add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF -add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE -add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr -add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE -add wave -noupdate -expand -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF -add wave -noupdate -expand -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM +add wave -noupdate -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState +add wave -noupdate -group lsu /testbench/dut/core/lsu/SelHPTW +add wave -noupdate -group lsu /testbench/dut/core/lsu/InterlockStall +add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUStallM +add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM +add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM +add wave -noupdate -group lsu /testbench/dut/core/lsu/WriteDataM +add wave -noupdate -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr +add wave -noupdate -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState +add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/BusStall +add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead +add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite +add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr +add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck +add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA +add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA +add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr +add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD} +add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} +add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} +add wave -noupdate -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr +add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay +add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay +add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag +add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag +add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay +add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay +add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM +add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay +add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr +add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode +add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE +add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType +add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate +add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation +add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss +add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit +add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress +add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault +add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM +add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM +add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr +add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE +add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal +add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite +add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress +add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions +add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable +add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent +add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed +add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault +add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF +add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM +add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM +add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress +add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM +add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM +add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW +add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW +add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF +add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM +add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM +add wave -noupdate -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState +add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF +add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE +add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr +add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE +add wave -noupdate -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF +add wave -noupdate -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intClaim @@ -516,8 +517,11 @@ add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/Nex add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/UpdatePTE add wave -noupdate {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/FinalByteMask} add wave -noupdate {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/SelectedWriteWordEn} +add wave -noupdate /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/FlushAdrFlag +add wave -noupdate /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/FlushWayFlag +add wave -noupdate /testbench/dut/core/ifu/bus/icache/icache/InvalidateCacheM TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 5} {307575 ns} 1} {{Cursor 2} {307965 ns} 1} {{Cursor 3} {112734 ns} 0} +WaveRestoreCursors {{Cursor 2} {989221 ns} 1} {{Cursor 3} {999815 ns} 1} {{Cursor 4} {993418 ns} 0} quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 314 @@ -533,4 +537,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {112550 ns} {112886 ns} +WaveRestoreZoom {993352 ns} {993510 ns} diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 62b74a8b6..741044dae 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -109,6 +109,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; logic save, restore; logic SelBusBuffer; + logic SRAMEnable; localparam LOGXLENBYTES = $clog2(`XLEN/8); logic [2**LOGWPL-1:0] MemPAdrDecoded; @@ -127,7 +128,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) - CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .LineByteMask, .FStore2, + CacheWays[NUMWAYS-1:0](.clk, .reset, .ce(SRAMEnable), .RAdr, .PAdr, .CacheWriteData, .LineByteMask, .FStore2, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .Invalidate(InvalidateCacheM)); @@ -229,6 +230,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER .SetValid, .SelEvict, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelBusBuffer, - .save, .restore, + .InvalidateCache(InvalidateCacheM), + .save, .restore, .SRAMEnable, .LRUWriteEn); endmodule diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index b877bd184..e73f723bc 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -37,6 +37,7 @@ module cachefsm input logic [1:0] CacheRW, input logic [1:0] CacheAtomic, input logic FlushCache, + input logic InvalidateCache, // hazard inputs input logic CPUBusy, // interlock fsm @@ -74,9 +75,10 @@ module cachefsm output logic FlushWayCntEn, output logic FlushAdrCntRst, output logic FlushWayCntRst, - output logic SelBusBuffer, + output logic SelBusBuffer, output logic save, - output logic restore); + output logic restore, + output logic SRAMEnable); logic resetDelay; logic AMO; @@ -117,7 +119,7 @@ module cachefsm assign DoRead = CacheRW[1] & ~IgnoreRequest; assign DoWrite = CacheRW[0] & ~IgnoreRequest; - assign DoAnyMiss = (DoAMO | DoRead | DoWrite) & ~CacheHit; + assign DoAnyMiss = (DoAMO | DoRead | DoWrite) & ~CacheHit & ~InvalidateCache; assign DoAnyUpdateHit = (DoAMO | DoWrite) & CacheHit; assign DoAnyHit = DoAnyUpdateHit | (DoRead & CacheHit); assign FlushFlag = FlushAdrFlag & FlushWayFlag; @@ -138,15 +140,15 @@ module cachefsm always_comb begin NextState = STATE_READY; case (CurrState) - STATE_READY: if(IgnoreRequest) NextState = STATE_READY; + STATE_READY: if(IgnoreRequest | InvalidateCache) NextState = STATE_READY; else if(DoFlush) NextState = STATE_FLUSH; - else if(DoAnyHit & CPUBusy) NextState = STATE_CPU_BUSY; - else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; // fetch first, then eviction if necessary. see delay in lru read/write path. + //else if(DoAnyHit & CPUBusy) NextState = STATE_CPU_BUSY; + else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; // fetch first, then eviction is necessary. see delay in lru read/write path. else NextState = STATE_READY; STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE; else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY_START; else NextState = STATE_MISS_FETCH_WDV; - STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; + STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; // cpu_busy not needed. load misses have the property of reading from the bus buffer rather than sram. STATE_MISS_READ_WORD: if(CacheRW[0] & ~AMO) NextState = STATE_MISS_WRITE_WORD; else NextState = STATE_MISS_READ_WORD_DELAY; STATE_MISS_READ_WORD_DELAY: if(CPUBusy) NextState = STATE_CPU_BUSY; @@ -237,5 +239,7 @@ module cachefsm resetDelay; assign SelBusBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE; + assign SRAMEnable = (CurrState == STATE_READY & ~CPUBusy | CacheStall) | (CurrState != STATE_READY) | reset; + //assign SRAMEnable = 1; endmodule // cachefsm diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 9734d8acc..0fbe4adbc 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -33,6 +33,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) ( input logic clk, + input logic ce, input logic reset, input logic [$clog2(NUMLINES)-1:0] RAdr, @@ -98,7 +99,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, // Tag Array ///////////////////////////////////////////////////////////////////////////////////////////// - sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, + sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce, .Adr(RAdr), .ReadData(ReadTag), .ByteMask('1), .CacheWriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(SetValidWay)); @@ -120,7 +121,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, localparam integer LOGNUMSRAM = $clog2(NUMSRAM); for(words = 0; words < NUMSRAM; words++) begin: word - sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .Adr(RAdr), + sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce, .Adr(RAdr), .ReadData(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]), .CacheWriteData(CacheWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]), //.WriteEnable(1'b1), .ByteMask(SRAMLineByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); @@ -140,7 +141,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, else if (SetValidWay) ValidBits[RAdr] <= #1 1'b1; else if (ClearValidWay) ValidBits[RAdr] <= #1 1'b0; end - flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); + flopen #($clog2(NUMLINES)) RAdrDelayReg(clk, ce, RAdr, RAdrD); assign Valid = ValidBits[RAdrD]; ///////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/cache/sram1p1rw.sv b/pipelined/src/cache/sram1p1rw.sv index 49bf5d852..ca1b07437 100644 --- a/pipelined/src/cache/sram1p1rw.sv +++ b/pipelined/src/cache/sram1p1rw.sv @@ -37,6 +37,7 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) ( input logic clk, + input logic ce, input logic [$clog2(DEPTH)-1:0] Adr, input logic [WIDTH-1:0] CacheWriteData, input logic WriteEnable, @@ -46,7 +47,7 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) ( logic [WIDTH-1:0] StoredData[DEPTH-1:0]; logic [$clog2(DEPTH)-1:0] AdrD; - always_ff @(posedge clk) AdrD <= Adr; + always_ff @(posedge clk) if(ce) AdrD <= Adr; genvar index; @@ -66,13 +67,13 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) ( end else begin if (WIDTH%8 != 0) // handle msbs if not a multiple of 8 always_ff @(posedge clk) - if (WriteEnable & ByteMask[WIDTH/8]) + if (ce & WriteEnable & ByteMask[WIDTH/8]) StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1 CacheWriteData[WIDTH-1:WIDTH-WIDTH%8]; for(index = 0; index < WIDTH/8; index++) always_ff @(posedge clk) - if(WriteEnable & ByteMask[index]) + if(ce & WriteEnable & ByteMask[index]) StoredData[Adr][index*8 +: 8] <= #1 CacheWriteData[index*8 +: 8]; assign ReadData = StoredData[AdrD]; From 17ae1a1b1bad86d452f5759939eca67a860b4517 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 22 Jul 2022 23:25:09 -0500 Subject: [PATCH 097/138] cache fsm cleanup after removal of replay. --- pipelined/src/cache/cachefsm.sv | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index e73f723bc..84c47b1c1 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -92,13 +92,7 @@ module cachefsm STATE_MISS_FETCH_WDV, STATE_MISS_EVICT_DIRTY_START, STATE_MISS_EVICT_DIRTY, - STATE_MISS_EVICT_DIRTY_DONE, STATE_MISS_WRITE_CACHE_LINE, - STATE_MISS_READ_WORD, - STATE_MISS_READ_WORD_DELAY, - STATE_MISS_WRITE_WORD, - // cpu stalled replay/restore state - STATE_CPU_BUSY, // flush cache STATE_FLUSH, STATE_FLUSH_CHECK, @@ -142,24 +136,15 @@ module cachefsm case (CurrState) STATE_READY: if(IgnoreRequest | InvalidateCache) NextState = STATE_READY; else if(DoFlush) NextState = STATE_FLUSH; - //else if(DoAnyHit & CPUBusy) NextState = STATE_CPU_BUSY; else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; // fetch first, then eviction is necessary. see delay in lru read/write path. else NextState = STATE_READY; STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE; else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY_START; else NextState = STATE_MISS_FETCH_WDV; STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; // cpu_busy not needed. load misses have the property of reading from the bus buffer rather than sram. - STATE_MISS_READ_WORD: if(CacheRW[0] & ~AMO) NextState = STATE_MISS_WRITE_WORD; - else NextState = STATE_MISS_READ_WORD_DELAY; - STATE_MISS_READ_WORD_DELAY: if(CPUBusy) NextState = STATE_CPU_BUSY; - else NextState = STATE_READY; - STATE_MISS_WRITE_WORD: if(CPUBusy) NextState = STATE_CPU_BUSY; - else NextState = STATE_READY; STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; else NextState = STATE_MISS_EVICT_DIRTY; STATE_MISS_EVICT_DIRTY_START: NextState = STATE_MISS_EVICT_DIRTY; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. - STATE_CPU_BUSY: if(CPUBusy) NextState = STATE_CPU_BUSY; - else NextState = STATE_READY; STATE_FLUSH: NextState = STATE_FLUSH_CHECK; STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK; else if(FlushFlag) NextState = STATE_READY; @@ -182,7 +167,6 @@ module cachefsm (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. - (CurrState == STATE_MISS_READ_WORD) | (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK & ~(FlushFlag)) | (CurrState == STATE_FLUSH_INCR) | @@ -191,7 +175,6 @@ module cachefsm // write enables internal to cache assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; assign SetDirty = (CurrState == STATE_READY & DoAnyUpdateHit) | - (CurrState == STATE_MISS_READ_WORD_DELAY & AMO) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & (AMO | CacheRW[0])); assign ClearValid = '0; assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | @@ -216,10 +199,8 @@ module cachefsm assign CacheWriteLine = (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); // handle cpu stall. - assign restore = ((CurrState == STATE_CPU_BUSY)) & ~`REPLAY; - assign save = ((CurrState == STATE_READY & DoAnyHit & CPUBusy) | - (CurrState == STATE_MISS_READ_WORD_DELAY & (AMO | CacheRW[1]) & CPUBusy) | - (CurrState == STATE_MISS_WRITE_WORD & DoWrite & CPUBusy)) & ~`REPLAY; + assign restore = '0; + assign save = '0; // **** can this be simplified? assign SelAdr = (CurrState == STATE_READY & (IgnoreRequestTLB & ~TrapM)) | // Ignore Request is needed on TLB miss. @@ -231,11 +212,6 @@ module cachefsm (CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_MISS_WRITE_CACHE_LINE) | - (CurrState == STATE_MISS_READ_WORD) | - (CurrState == STATE_MISS_READ_WORD_DELAY & (AMO | (CPUBusy & `REPLAY))) | - (CurrState == STATE_MISS_WRITE_WORD) | - - (CurrState == STATE_CPU_BUSY & (CPUBusy & `REPLAY)) | resetDelay; assign SelBusBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE; From 27e32980adc2f3b956c3b9d2fa7aa7c6379e22a7 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 22 Jul 2022 23:29:14 -0500 Subject: [PATCH 098/138] cache cleanup after removing replay on cpubusy. --- pipelined/src/cache/cache.sv | 20 +++++--------------- pipelined/src/cache/cachefsm.sv | 6 ------ pipelined/src/cache/subcachelineread.sv | 12 +----------- 3 files changed, 6 insertions(+), 32 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 741044dae..dbb69e464 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -79,7 +79,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER logic ClearValid; logic ClearDirty; logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; - logic [NUMWAYS-1:0] HitWay, HitWaySaved, HitWayFinal; + logic [NUMWAYS-1:0] HitWay; logic CacheHit; logic SetDirty; logic SetValid; @@ -107,7 +107,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER logic [1:0] CacheRW, CacheAtomic; logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; - logic save, restore; logic SelBusBuffer; logic SRAMEnable; @@ -134,7 +133,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER .Invalidate(InvalidateCacheM)); if(NUMWAYS > 1) begin:vict cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy( - .clk, .reset, .HitWay(HitWayFinal), .VictimWay, .RAdr, .LRUWriteEn); + .clk, .reset, .HitWay, .VictimWay, .RAdr, .LRUWriteEn); end else assign VictimWay = 1'b1; // one hot. assign CacheHit = | HitWay; assign VictimDirty = | VictimDirtyWay; @@ -144,15 +143,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache)); or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); - // Because of the sram clocked read when the ieu is stalled the read data maybe lost. - // There are two ways to resolve. 1. We can replay the read of the sram or we can save - // the data. Replay is eaiser but creates a longer critical path. - // save/restore only wayhit and readdata. - if(!`REPLAY) begin - flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, HitWay, HitWaySaved); - mux2 #(NUMWAYS) saverestoremux(HitWay, HitWaySaved, restore, HitWayFinal); - end else assign HitWayFinal = HitWay; - // like to fix this. if(DCACHE) mux2 #(LOGWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]), @@ -163,7 +153,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER mux2 #(LINELEN) EarlyReturnBuf(ReadDataLineCache, CacheBusWriteData, SelBusBuffer, ReadDataLine); subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL, LOGWPL) subcachelineread( - .clk, .reset, .PAdr(WordOffsetAddr), .save, .restore, + .PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord); ///////////////////////////////////////////////////////////////////////////////////////////// @@ -210,7 +200,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write Enables ///////////////////////////////////////////////////////////////////////////////////////////// - mux3 #(NUMWAYS) selectwaymux(HitWayFinal, VictimWay, FlushWay, + mux3 #(NUMWAYS) selectwaymux(HitWay, VictimWay, FlushWay, {SelFlush, SetValid}, SelectedWay); assign SetValidWay = SetValid ? SelectedWay : '0; assign ClearValidWay = ClearValid ? SelectedWay : '0; @@ -231,6 +221,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelBusBuffer, .InvalidateCache(InvalidateCacheM), - .save, .restore, .SRAMEnable, + .SRAMEnable, .LRUWriteEn); endmodule diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 84c47b1c1..46f7825b6 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -76,8 +76,6 @@ module cachefsm output logic FlushAdrCntRst, output logic FlushWayCntRst, output logic SelBusBuffer, - output logic save, - output logic restore, output logic SRAMEnable); logic resetDelay; @@ -198,10 +196,6 @@ module cachefsm assign CacheFetchLine = (CurrState == STATE_READY & DoAnyMiss); assign CacheWriteLine = (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); - // handle cpu stall. - assign restore = '0; - assign save = '0; - // **** can this be simplified? assign SelAdr = (CurrState == STATE_READY & (IgnoreRequestTLB & ~TrapM)) | // Ignore Request is needed on TLB miss. // use the raw requests as we don't want IgnoreRequestTrapM in the critical path diff --git a/pipelined/src/cache/subcachelineread.sv b/pipelined/src/cache/subcachelineread.sv index 2a3395ea5..fe9139659 100644 --- a/pipelined/src/cache/subcachelineread.sv +++ b/pipelined/src/cache/subcachelineread.sv @@ -31,10 +31,7 @@ `include "wally-config.vh" module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)( - input logic clk, - input logic reset, input logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1 : 0] PAdr, - input logic save, restore, input logic [LINELEN-1:0] ReadDataLine, output logic [WORDLEN-1:0] ReadDataWord); @@ -43,7 +40,6 @@ module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)( localparam PADLEN = WORDLEN-MUXINTERVAL; logic [LINELEN+(WORDLEN-MUXINTERVAL)-1:0] ReadDataLinePad; logic [WORDLEN-1:0] ReadDataLineSets [(LINELEN/MUXINTERVAL)-1:0]; - logic [WORDLEN-1:0] ReadDataWordRaw, ReadDataWordSaved; if (PADLEN > 0) begin logic [PADLEN-1:0] Pad; @@ -56,11 +52,5 @@ module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)( assign ReadDataLineSets[index] = ReadDataLinePad[(index*MUXINTERVAL)+WORDLEN-1: (index*MUXINTERVAL)]; end // variable input mux - // *** maybe remove REPLAY config later after deciding which way is best - assign ReadDataWordRaw = ReadDataLineSets[PAdr]; - if(!`REPLAY) begin - flopen #(WORDLEN) cachereaddatasavereg(clk, save, ReadDataWordRaw, ReadDataWordSaved); - mux2 #(WORDLEN) readdatasaverestoremux(ReadDataWordRaw, ReadDataWordSaved, - restore, ReadDataWord); - end else assign ReadDataWord = ReadDataWordRaw; + assign ReadDataWord = ReadDataLineSets[PAdr]; endmodule From 05484c4c054b93fe60bc17a2834683c626156d2c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 22 Jul 2022 23:36:27 -0500 Subject: [PATCH 099/138] signal name cleanup. --- pipelined/src/cache/cache.sv | 6 +++--- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/lsu.sv | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index dbb69e464..d28697e21 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -38,7 +38,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER input logic [1:0] RW, input logic [1:0] Atomic, input logic FlushCache, - input logic InvalidateCacheM, + input logic InvalidateCache, input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] PAdr, // physical address input logic [(`XLEN-1)/8:0] ByteMask, @@ -130,7 +130,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER CacheWays[NUMWAYS-1:0](.clk, .reset, .ce(SRAMEnable), .RAdr, .PAdr, .CacheWriteData, .LineByteMask, .FStore2, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, - .Invalidate(InvalidateCacheM)); + .Invalidate(InvalidateCache)); if(NUMWAYS > 1) begin:vict cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy( .clk, .reset, .HitWay, .VictimWay, .RAdr, .LRUWriteEn); @@ -220,7 +220,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER .SetValid, .SelEvict, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelBusBuffer, - .InvalidateCache(InvalidateCacheM), + .InvalidateCache, .SRAMEnable, .LRUWriteEn); endmodule diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 5c2f799d0..af78d842d 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -236,7 +236,7 @@ module ifu ( .Atomic('0), .FlushCache('0), .NextAdr(PCNextFSpill[11:0]), .PAdr(PCPF), - .CacheCommitted(), .InvalidateCacheM(InvalidateICacheM)); + .CacheCommitted(), .InvalidateCache(InvalidateICacheM)); end else begin : passthrough assign {ICacheFetchLine, ICacheBusAdr, ICacheStallF, FinalInstrRawF} = '0; diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index e9f41e653..e6f632640 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -249,7 +249,7 @@ module lsu ( .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM), .CacheBusWriteData(DCacheBusWriteData), .CacheFetchLine(DCacheFetchLine), - .CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCacheM(1'b0)); + .CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); end else begin : passthrough assign {ReadDataWordM, DCacheStallM, DCacheCommittedM, DCacheFetchLine, DCacheWriteLine} = '0; From cd688966374dc5d461dc29c0e77aaf1945393cb8 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 24 Jul 2022 00:22:43 -0500 Subject: [PATCH 100/138] Merged evict dirty clear with flush write back. --- pipelined/src/cache/cachefsm.sv | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 46f7825b6..7591a512e 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -95,8 +95,7 @@ module cachefsm STATE_FLUSH, STATE_FLUSH_CHECK, STATE_FLUSH_INCR, - STATE_FLUSH_WRITE_BACK, - STATE_FLUSH_CLEAR_DIRTY} statetype; + STATE_FLUSH_WRITE_BACK} statetype; (* mark_debug = "true" *) statetype CurrState, NextState; logic IgnoreRequest; @@ -149,11 +148,11 @@ module cachefsm else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; else NextState = STATE_FLUSH_CHECK; STATE_FLUSH_INCR: NextState = STATE_FLUSH_CHECK; - STATE_FLUSH_WRITE_BACK: if(CacheBusAck) NextState = STATE_FLUSH_CLEAR_DIRTY; - else NextState = STATE_FLUSH_WRITE_BACK; - STATE_FLUSH_CLEAR_DIRTY: if(FlushFlag) NextState = STATE_READY; - else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; - else NextState = STATE_FLUSH_CHECK; + STATE_FLUSH_WRITE_BACK: if(CacheBusAck) begin + if(FlushFlag) NextState = STATE_READY; + else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; + else NextState = STATE_FLUSH_CHECK; + end else NextState = STATE_FLUSH_WRITE_BACK; default: NextState = STATE_READY; endcase end @@ -169,27 +168,26 @@ module cachefsm (CurrState == STATE_FLUSH_CHECK & ~(FlushFlag)) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | - (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag)); + (CurrState == STATE_FLUSH_WRITE_BACK & ~(FlushFlag) & CacheBusAck); // write enables internal to cache assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; assign SetDirty = (CurrState == STATE_READY & DoAnyUpdateHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & (AMO | CacheRW[0])); assign ClearValid = '0; assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | - (CurrState == STATE_FLUSH_CLEAR_DIRTY); + (CurrState == STATE_FLUSH_WRITE_BACK & CacheBusAck); assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE); // Flush and eviction controls assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_MISS_EVICT_DIRTY); assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | - (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | - (CurrState == STATE_FLUSH_CLEAR_DIRTY); + (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK); assign FlushWayAndNotAdrFlag = FlushWayFlag & ~FlushAdrFlag; assign FlushAdrCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & FlushWayAndNotAdrFlag) | - (CurrState == STATE_FLUSH_CLEAR_DIRTY & FlushWayAndNotAdrFlag); + (CurrState == STATE_FLUSH_WRITE_BACK & FlushWayAndNotAdrFlag & CacheBusAck); assign FlushWayCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & ~(FlushFlag)) | - (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~FlushFlag); + (CurrState == STATE_FLUSH_WRITE_BACK & ~FlushFlag & CacheBusAck); assign FlushAdrCntRst = (CurrState == STATE_READY); assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR); // Bus interface controls @@ -210,6 +208,5 @@ module cachefsm assign SelBusBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE; assign SRAMEnable = (CurrState == STATE_READY & ~CPUBusy | CacheStall) | (CurrState != STATE_READY) | reset; - //assign SRAMEnable = 1; endmodule // cachefsm From f3cf46d633b940819358a7777f2d7a0884ed5fa8 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 24 Jul 2022 00:24:13 -0500 Subject: [PATCH 101/138] Added more i-cache signals to wave file. --- pipelined/regression/wave.do | 400 +++++++++++++++++------------------ 1 file changed, 196 insertions(+), 204 deletions(-) diff --git a/pipelined/regression/wave.do b/pipelined/regression/wave.do index eac613075..b0c55d279 100644 --- a/pipelined/regression/wave.do +++ b/pipelined/regression/wave.do @@ -5,39 +5,38 @@ add wave -noupdate /testbench/reset add wave -noupdate /testbench/reset_ext add wave -noupdate /testbench/memfilename add wave -noupdate /testbench/dut/core/SATP_REGW -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWriteFencePendingDEM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM -add wave -noupdate -expand -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/StoreStallD -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/ifu/IFUStallF -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/MDUStallD -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/DivBusyE -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/IllegalInstrFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/BreakpointFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/EcallFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/priv/trap/InterruptM -add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/hzu/FlushF -add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD -add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE -add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM -add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallF -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallD -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallE -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallM -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallW +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWriteFencePendingDEM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM +add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/ifu/IFUStallF +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/MDUStallD +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/DivBusyE +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/IllegalInstrFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/BreakpointFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/EcallFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrPageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InterruptM +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/hzu/FlushF +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallF +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallD +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallE +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallM +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallW add wave -noupdate -group {instruction pipeline} /testbench/InstrFName add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/FinalInstrRawF add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD @@ -126,12 +125,12 @@ add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/if add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredClassNonCFIWrongE add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE -add wave -noupdate -expand -group PCS /testbench/dut/core/ifu/PCNextF -add wave -noupdate -expand -group PCS /testbench/dut/core/PCF -add wave -noupdate -expand -group PCS /testbench/dut/core/ifu/PCD -add wave -noupdate -expand -group PCS /testbench/dut/core/PCE -add wave -noupdate -expand -group PCS /testbench/dut/core/PCM -add wave -noupdate -expand -group PCS /testbench/PCW +add wave -noupdate -group PCS /testbench/dut/core/ifu/PCNextF +add wave -noupdate -group PCS /testbench/dut/core/PCF +add wave -noupdate -group PCS /testbench/dut/core/ifu/PCD +add wave -noupdate -group PCS /testbench/dut/core/PCE +add wave -noupdate -group PCS /testbench/dut/core/PCM +add wave -noupdate -group PCS /testbench/PCW add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F @@ -199,14 +198,14 @@ add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM add wave -noupdate -group lsu /testbench/dut/core/lsu/WriteDataM add wave -noupdate -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr -add wave -noupdate -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/BusStall -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA +add wave -noupdate -group lsu -expand -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState +add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/BusStall +add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusRead +add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusWrite +add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusAdr +add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusAck +add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusHRDATA +add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusHWDATA add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid @@ -224,65 +223,65 @@ add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbe add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid -add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid +add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} @@ -305,22 +304,22 @@ add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM +add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW +add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr +add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr +add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic +add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache +add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall +add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM +add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord +add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine +add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine +add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr +add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck +add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode @@ -424,81 +423,81 @@ add wave -noupdate -group {debug trace} -expand -group wb /testbench/PCW add wave -noupdate -group {pc selection} /testbench/dut/core/ifu/PCNext2F add wave -noupdate -group {pc selection} /testbench/dut/core/ifu/PrivilegedNextPCM add wave -noupdate -group {pc selection} /testbench/dut/core/ifu/PrivilegedChangePCM -add wave -noupdate -expand -group ifu -color Gold /testbench/dut/core/ifu/bus/busdp/busfsm/BusCurrState -add wave -noupdate -expand -group ifu /testbench/dut/core/ifu/IFUBusRead -add wave -noupdate -expand -group ifu /testbench/dut/core/ifu/IFUBusAdr -add wave -noupdate -expand -group ifu /testbench/dut/core/ifu/IFUBusAck -add wave -noupdate -expand -group ifu /testbench/dut/core/ifu/IFUBusHRDATA -add wave -noupdate -expand -group ifu /testbench/dut/core/ifu/IFUTransComplete -add wave -noupdate -expand -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillF -add wave -noupdate -expand -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/CurrState -add wave -noupdate -expand -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillDataLine0 -add wave -noupdate -expand -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SelSpillF -add wave -noupdate -expand -group ifu -expand -group icache -color Gold /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CurrState -add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/ITLBMissF -add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SelAdr -add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/PCNextF -add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/PCPF -add wave -noupdate -expand -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/icache/HitWay -add wave -noupdate -expand -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/ICacheStallF -add wave -noupdate -expand -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/FinalInstrRawF -add wave -noupdate -expand -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr -add wave -noupdate -expand -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck -add wave -noupdate -expand -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusWriteData -add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/VictimWay -add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetDirtyWay -add wave -noupdate -expand -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetValidWay -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/ByteMask} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/ReadData} -add wave -noupdate -expand -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite -add wave -noupdate -expand -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF -add wave -noupdate -expand -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress +add wave -noupdate -group ifu -color Gold /testbench/dut/core/ifu/bus/busdp/busfsm/BusCurrState +add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusRead +add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAdr +add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAck +add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusHRDATA +add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUTransComplete +add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillF +add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/CurrState +add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillDataLine0 +add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SelSpillF +add wave -noupdate -group ifu -expand -group icache -color Gold /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CurrState +add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/ITLBMissF +add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SelAdr +add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/PCNextF +add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/PCPF +add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/icache/HitWay +add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/ICacheStallF +add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/FinalInstrRawF +add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr +add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck +add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusWriteData +add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/VictimWay +add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetDirtyWay +add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetValidWay +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/ByteMask} +add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/ReadData} +add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite +add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF +add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress add wave -noupdate -group {Performance Counters} -label MCYCLE -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[0]} add wave -noupdate -group {Performance Counters} -label MINSTRET -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[2]} add wave -noupdate -group {Performance Counters} -label {LOAD STORE HAZARD} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[3]} @@ -513,15 +512,8 @@ add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {I add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {ICACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]} add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]} add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]} -add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/NextPTE -add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/UpdatePTE -add wave -noupdate {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/FinalByteMask} -add wave -noupdate {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/SelectedWriteWordEn} -add wave -noupdate /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/FlushAdrFlag -add wave -noupdate /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/FlushWayFlag -add wave -noupdate /testbench/dut/core/ifu/bus/icache/icache/InvalidateCacheM TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {989221 ns} 1} {{Cursor 3} {999815 ns} 1} {{Cursor 4} {993418 ns} 0} +WaveRestoreCursors {{Cursor 2} {989221 ns} 1} {{Cursor 3} {999815 ns} 1} {{Cursor 4} {335041 ns} 0} quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 314 @@ -537,4 +529,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {993352 ns} {993510 ns} +WaveRestoreZoom {334923 ns} {335159 ns} From 69d520a7eb2ac820baf7bb3b29374b5f5e5990e0 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 24 Jul 2022 00:34:11 -0500 Subject: [PATCH 102/138] Removed replay from the config files. --- pipelined/config/buildroot/wally-config.vh | 1 - pipelined/config/fpga/wally-config.vh | 1 - pipelined/config/rv32e/wally-config.vh | 1 - pipelined/config/rv32gc/wally-config.vh | 1 - pipelined/config/rv32i/wally-config.vh | 2 - pipelined/config/rv32ic/wally-config.vh | 2 - pipelined/config/rv64BP/wally-config.vh | 2 - pipelined/config/rv64fp/wally-config.vh | 1 - pipelined/config/rv64fpquad/wally-config.vh | 1 - pipelined/config/rv64gc/wally-config.vh | 1 - pipelined/config/rv64i/wally-config.vh | 1 - pipelined/config/rv64ic/wally-config.vh | 1 - pipelined/regression/wally-pipelined.do | 2 +- pipelined/src/cache/cachefsm.sv | 45 +++++++++++---------- 14 files changed, 24 insertions(+), 38 deletions(-) diff --git a/pipelined/config/buildroot/wally-config.vh b/pipelined/config/buildroot/wally-config.vh index f11b71c0a..383cab5b1 100644 --- a/pipelined/config/buildroot/wally-config.vh +++ b/pipelined/config/buildroot/wally-config.vh @@ -130,5 +130,4 @@ `define BPRED_SIZE 10 -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 1 diff --git a/pipelined/config/fpga/wally-config.vh b/pipelined/config/fpga/wally-config.vh index 12e9fb363..2d349c8d9 100644 --- a/pipelined/config/fpga/wally-config.vh +++ b/pipelined/config/fpga/wally-config.vh @@ -141,5 +141,4 @@ `define BPRED_SIZE 10 -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 1 diff --git a/pipelined/config/rv32e/wally-config.vh b/pipelined/config/rv32e/wally-config.vh index 7d083f3b5..4160957b6 100644 --- a/pipelined/config/rv32e/wally-config.vh +++ b/pipelined/config/rv32e/wally-config.vh @@ -135,5 +135,4 @@ `define TESTSBP 0 `define BPRED_SIZE 10 -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv32gc/wally-config.vh b/pipelined/config/rv32gc/wally-config.vh index 70124d551..f15f15a33 100644 --- a/pipelined/config/rv32gc/wally-config.vh +++ b/pipelined/config/rv32gc/wally-config.vh @@ -133,5 +133,4 @@ `define TESTSBP 0 `define BPRED_SIZE 10 -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv32i/wally-config.vh b/pipelined/config/rv32i/wally-config.vh index d44072d6a..001157a13 100644 --- a/pipelined/config/rv32i/wally-config.vh +++ b/pipelined/config/rv32i/wally-config.vh @@ -135,6 +135,4 @@ `define TESTSBP 0 `define BPRED_SIZE 10 - -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh index e42fd3100..4fcffd73c 100644 --- a/pipelined/config/rv32ic/wally-config.vh +++ b/pipelined/config/rv32ic/wally-config.vh @@ -133,6 +133,4 @@ `define TESTSBP 0 `define BPRED_SIZE 10 - -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv64BP/wally-config.vh b/pipelined/config/rv64BP/wally-config.vh index 3bc745eb1..53ca026ce 100644 --- a/pipelined/config/rv64BP/wally-config.vh +++ b/pipelined/config/rv64BP/wally-config.vh @@ -136,6 +136,4 @@ `define TESTSBP 1 `define BPRED_SIZE 10 - -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index cc8d1b2b8..bf0831d14 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -137,5 +137,4 @@ `define TESTSBP 0 `define BPRED_SIZE 10 -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv64fpquad/wally-config.vh b/pipelined/config/rv64fpquad/wally-config.vh index 0dee000e2..eb8b02662 100644 --- a/pipelined/config/rv64fpquad/wally-config.vh +++ b/pipelined/config/rv64fpquad/wally-config.vh @@ -136,5 +136,4 @@ `define TESTSBP 0 `define BPRED_SIZE 10 -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index 9afa1a679..19750d6be 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -136,5 +136,4 @@ `define TESTSBP 0 `define BPRED_SIZE 10 -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv64i/wally-config.vh b/pipelined/config/rv64i/wally-config.vh index 67ca51a7a..99c70cd68 100644 --- a/pipelined/config/rv64i/wally-config.vh +++ b/pipelined/config/rv64i/wally-config.vh @@ -136,5 +136,4 @@ `define TESTSBP 0 `define BPRED_SIZE 10 -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv64ic/wally-config.vh b/pipelined/config/rv64ic/wally-config.vh index fca1f2609..a31a555a5 100644 --- a/pipelined/config/rv64ic/wally-config.vh +++ b/pipelined/config/rv64ic/wally-config.vh @@ -136,5 +136,4 @@ `define TESTSBP 0 `define BPRED_SIZE 10 -`define REPLAY 0 `define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/regression/wally-pipelined.do b/pipelined/regression/wally-pipelined.do index 6ef601f58..381311397 100644 --- a/pipelined/regression/wally-pipelined.do +++ b/pipelined/regression/wally-pipelined.do @@ -38,7 +38,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829 -fatal 7 #-- Run the Simulation - run -all + #run -all add log -recursive /* do linux-wave.do run -all diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 7591a512e..9d0778373 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -131,29 +131,31 @@ module cachefsm always_comb begin NextState = STATE_READY; case (CurrState) - STATE_READY: if(IgnoreRequest | InvalidateCache) NextState = STATE_READY; - else if(DoFlush) NextState = STATE_FLUSH; - else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; // fetch first, then eviction is necessary. see delay in lru read/write path. - else NextState = STATE_READY; - STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE; + STATE_READY: if(IgnoreRequest | InvalidateCache) NextState = STATE_READY; + else if(DoFlush) NextState = STATE_FLUSH; + // Delayed LRU update. Cannot check if victim line is dirty on this cycle. + // To optimize do the fetch first, then eviction if necessary. + else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; + else NextState = STATE_READY; + STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE; else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY_START; - else NextState = STATE_MISS_FETCH_WDV; - STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; // cpu_busy not needed. load misses have the property of reading from the bus buffer rather than sram. - STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; - else NextState = STATE_MISS_EVICT_DIRTY; - STATE_MISS_EVICT_DIRTY_START: NextState = STATE_MISS_EVICT_DIRTY; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. - STATE_FLUSH: NextState = STATE_FLUSH_CHECK; - STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK; - else if(FlushFlag) NextState = STATE_READY; - else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; - else NextState = STATE_FLUSH_CHECK; - STATE_FLUSH_INCR: NextState = STATE_FLUSH_CHECK; + else NextState = STATE_MISS_FETCH_WDV; + STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; + STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; + else NextState = STATE_MISS_EVICT_DIRTY; + STATE_MISS_EVICT_DIRTY_START: NextState = STATE_MISS_EVICT_DIRTY; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. + STATE_FLUSH: NextState = STATE_FLUSH_CHECK; + STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK; + else if(FlushFlag) NextState = STATE_READY; + else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; + else NextState = STATE_FLUSH_CHECK; + STATE_FLUSH_INCR: NextState = STATE_FLUSH_CHECK; STATE_FLUSH_WRITE_BACK: if(CacheBusAck) begin - if(FlushFlag) NextState = STATE_READY; - else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; - else NextState = STATE_FLUSH_CHECK; - end else NextState = STATE_FLUSH_WRITE_BACK; - default: NextState = STATE_READY; + if(FlushFlag) NextState = STATE_READY; + else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; + else NextState = STATE_FLUSH_CHECK; + end else NextState = STATE_FLUSH_WRITE_BACK; + default: NextState = STATE_READY; endcase end @@ -198,7 +200,6 @@ module cachefsm assign SelAdr = (CurrState == STATE_READY & (IgnoreRequestTLB & ~TrapM)) | // Ignore Request is needed on TLB miss. // use the raw requests as we don't want IgnoreRequestTrapM in the critical path (CurrState == STATE_READY & ((AMO | CacheRW[0]) & CacheHit)) | // changes if store delay hazard removed - (CurrState == STATE_READY & (CacheRW[1] & CacheHit) & (CPUBusy & `REPLAY)) | (CurrState == STATE_READY & (DoAnyMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_EVICT_DIRTY) | From 719b00e338bc1d7de3043bee53dc502c60519698 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 24 Jul 2022 01:20:29 -0500 Subject: [PATCH 103/138] Overlapped read fetch line end with eviction write line start. I'm a bit concerned this is not well tested. --- pipelined/regression/linux-wave.do | 474 +++++++++++++---------------- pipelined/regression/wave.do | 346 ++++++++++----------- pipelined/src/cache/cachefsm.sv | 14 +- pipelined/src/lsu/busfsm.sv | 14 +- 4 files changed, 400 insertions(+), 448 deletions(-) diff --git a/pipelined/regression/linux-wave.do b/pipelined/regression/linux-wave.do index 62de1e0d2..10f98251e 100644 --- a/pipelined/regression/linux-wave.do +++ b/pipelined/regression/linux-wave.do @@ -11,11 +11,9 @@ add wave -noupdate -expand -group Testbench /testbench/interruptEpcVal add wave -noupdate -expand -group Testbench /testbench/interruptTVal add wave -noupdate -expand -group Testbench /testbench/interruptDesc add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWritePendingDEM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/StoreStallD add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/DivBusyE add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/ExceptionM @@ -56,26 +54,25 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE add wave -noupdate -group {Execution Stage} /testbench/ExpectedPCE -add wave -noupdate -group {Execution Stage} /testbench/MepcExpected add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE add wave -noupdate -group {Execution Stage} /testbench/InstrEName add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE add wave -noupdate -group {Execution Stage} /testbench/textE add wave -noupdate -group {Execution Stage} -color {Cornflower Blue} /testbench/FunctionName/FunctionName -add wave -noupdate -group {Memory Stage} /testbench/checkInstrM -add wave -noupdate -group {Memory Stage} /testbench/dut/core/PCM -add wave -noupdate -group {Memory Stage} /testbench/ExpectedPCM -add wave -noupdate -group {Memory Stage} /testbench/dut/core/InstrM -add wave -noupdate -group {Memory Stage} /testbench/InstrMName -add wave -noupdate -group {Memory Stage} /testbench/textM -add wave -noupdate -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM -add wave -noupdate -group {WriteBack stage} /testbench/checkInstrW -add wave -noupdate -group {WriteBack stage} /testbench/InstrValidW -add wave -noupdate -group {WriteBack stage} /testbench/PCW -add wave -noupdate -group {WriteBack stage} /testbench/ExpectedPCW -add wave -noupdate -group {WriteBack stage} /testbench/InstrW -add wave -noupdate -group {WriteBack stage} /testbench/InstrWName -add wave -noupdate -group {WriteBack stage} /testbench/textW +add wave -noupdate -expand -group {Memory Stage} /testbench/checkInstrM +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM +add wave -noupdate -expand -group {Memory Stage} /testbench/ExpectedPCM +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM +add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName +add wave -noupdate -expand -group {Memory Stage} /testbench/textM +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM +add wave -noupdate -expand -group {WriteBack stage} /testbench/checkInstrW +add wave -noupdate -expand -group {WriteBack stage} /testbench/InstrValidW +add wave -noupdate -expand -group {WriteBack stage} /testbench/PCW +add wave -noupdate -expand -group {WriteBack stage} /testbench/ExpectedPCW +add wave -noupdate -expand -group {WriteBack stage} /testbench/InstrW +add wave -noupdate -expand -group {WriteBack stage} /testbench/InstrWName +add wave -noupdate -expand -group {WriteBack stage} /testbench/textW add wave -noupdate -group Bpred -color Orange /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHR add wave -noupdate -group Bpred -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF add wave -noupdate -group Bpred -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]} @@ -196,203 +193,202 @@ add wave -noupdate -group ifu -expand -group icache -expand -group memory /testb add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/ITLBMissF add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress -add wave -noupdate -group lsu /testbench/dut/core/lsu/IEUAdrM -add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUPAdrM -add wave -noupdate -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState -add wave -noupdate -group lsu /testbench/dut/core/lsu/SelHPTW -add wave -noupdate -group lsu /testbench/dut/core/lsu/InterlockStall -add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUStallM -add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM -add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM -add wave -noupdate -group lsu /testbench/dut/core/lsu/WriteDataM -add wave -noupdate -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr -add wave -noupdate -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/BusStall -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA -add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr -add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD} -add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} -add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData[69]} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetValid -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM -add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM -add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_D -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_A -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_U -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_X -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_W -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_R -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_V -add wave -noupdate -group lsu -group dtlb -expand -group Status -color Maroon /testbench/dut/core/lsu/dmmu/dmmu/DAPageFault -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/ImproperPrivilege -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/UpperBitsUnequalPageFault -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Misaligned -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidRead -add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidWrite -add wave -noupdate -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault -add wave -noupdate -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM -add wave -noupdate -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr -add wave -noupdate -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE -add wave -noupdate -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal -add wave -noupdate -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM -add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState -add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF -add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE -add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr -add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE -add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissF -add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissOrDAFaultF -add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF -add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/IEUAdrM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUPAdrM +add wave -noupdate -expand -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/SelHPTW +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/InterlockStall +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUStallM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/WriteDataM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr +add wave -noupdate -expand -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/BusStall +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA +add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA +add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD} +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetValid +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM +add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_D +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_A +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_U +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_X +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_W +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_R +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_V +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status -color Maroon /testbench/dut/core/lsu/dmmu/dmmu/DAPageFault +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/ImproperPrivilege +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/UpperBitsUnequalPageFault +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Misaligned +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidRead +add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidWrite +add wave -noupdate -expand -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault +add wave -noupdate -expand -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM +add wave -noupdate -expand -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr +add wave -noupdate -expand -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE +add wave -noupdate -expand -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal +add wave -noupdate -expand -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState +add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF +add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE +add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr +add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissF +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissOrDAFaultF +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM add wave -noupdate -group AHB -color Gold /testbench/dut/core/ebu/BusState add wave -noupdate -group AHB /testbench/dut/core/ebu/NextBusState add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/AtomicMaskedM @@ -424,55 +420,14 @@ add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite add wave -noupdate -group itlb /testbench/dut/core/ifu/ITLBMissF add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/PMAInstrAccessFaultF -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HSELPLIC -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HADDR -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWRITE -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADY -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HTRANS -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWDATA add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADPLIC -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HRESPPLIC -add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADYPLIC -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HCLK -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HSELGPIO -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HADDR -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWDATA -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWRITE -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADY -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HTRANS -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADGPIO -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HRESPGPIO -add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADYGPIO add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HCLK -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HSELCLINT -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HADDR -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWRITE -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWDATA -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADY -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HTRANS -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADCLINT -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/TimerIntM -add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/SwIntM -add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HCLK -add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESETn -add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HSELUART -add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HADDR -add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWRITE -add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWDATA -add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADUART -add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESPUART -add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADYUART add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/SIN add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DSRb add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DCDb @@ -486,11 +441,6 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/INTR add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/TXRDYb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/RXRDYb -add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HCLK -add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HSELUART -add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HADDR -add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWRITE -add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWDATA add wave -noupdate -group {debug trace} -expand -group mem -color Yellow /testbench/dut/core/FlushW add wave -noupdate -group {debug trace} -expand -group mem /testbench/checkInstrM add wave -noupdate -group {debug trace} -expand -group mem /testbench/dut/core/PCM @@ -517,7 +467,7 @@ add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {I add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]} add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]} TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {2240751 ns} 0} +WaveRestoreCursors {{Cursor 4} {87475 ns} 0} quietly wave cursor active 1 configure wave -namecolwidth 250 configure wave -valuecolwidth 314 @@ -533,4 +483,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {2240730 ns} {2240764 ns} +WaveRestoreZoom {87386 ns} {87672 ns} diff --git a/pipelined/regression/wave.do b/pipelined/regression/wave.do index b0c55d279..a9a9feee9 100644 --- a/pipelined/regression/wave.do +++ b/pipelined/regression/wave.do @@ -190,177 +190,177 @@ add wave -noupdate -group AHB /testbench/dut/core/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITED -add wave -noupdate -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState -add wave -noupdate -group lsu /testbench/dut/core/lsu/SelHPTW -add wave -noupdate -group lsu /testbench/dut/core/lsu/InterlockStall -add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUStallM -add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM -add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM -add wave -noupdate -group lsu /testbench/dut/core/lsu/WriteDataM -add wave -noupdate -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr -add wave -noupdate -group lsu -expand -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState -add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/BusStall -add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusRead -add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusWrite -add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusAdr -add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusAck -add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusHRDATA -add wave -noupdate -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusHWDATA -add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr -add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD} -add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} -add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag -add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} -add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay -add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty -add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW -add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr -add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr -add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic -add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache -add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall -add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM -add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM -add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit -add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine -add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine -add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr -add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData -add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck -add wave -noupdate -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress -add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault -add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM -add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr -add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE -add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal -add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM -add wave -noupdate -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE -add wave -noupdate -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF -add wave -noupdate -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM +add wave -noupdate -expand -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/SelHPTW +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/InterlockStall +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUStallM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/WriteDataM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr +add wave -noupdate -expand -group lsu -expand -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/BusStall +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusRead +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusWrite +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusAdr +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusAck +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusHRDATA +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusHWDATA +add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD} +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM +add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress +add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault +add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM +add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr +add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE +add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal +add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE +add wave -noupdate -expand -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF +add wave -noupdate -expand -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intClaim @@ -513,7 +513,7 @@ add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {I add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]} add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]} TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {989221 ns} 1} {{Cursor 3} {999815 ns} 1} {{Cursor 4} {335041 ns} 0} +WaveRestoreCursors {{Cursor 2} {989221 ns} 1} {{Cursor 3} {999815 ns} 1} {{Cursor 4} {311315 ns} 0} quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 314 @@ -529,4 +529,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {334923 ns} {335159 ns} +WaveRestoreZoom {311178 ns} {311464 ns} diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 9d0778373..a1c785dc7 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -88,7 +88,6 @@ module cachefsm typedef enum logic [3:0] {STATE_READY, // hit states // miss states STATE_MISS_FETCH_WDV, - STATE_MISS_EVICT_DIRTY_START, STATE_MISS_EVICT_DIRTY, STATE_MISS_WRITE_CACHE_LINE, // flush cache @@ -138,12 +137,12 @@ module cachefsm else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; else NextState = STATE_READY; STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE; - else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY_START; + else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; else NextState = STATE_MISS_FETCH_WDV; STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; else NextState = STATE_MISS_EVICT_DIRTY; - STATE_MISS_EVICT_DIRTY_START: NextState = STATE_MISS_EVICT_DIRTY; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. + // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: NextState = STATE_FLUSH_CHECK; STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK; else if(FlushFlag) NextState = STATE_READY; @@ -163,13 +162,11 @@ module cachefsm assign CacheCommitted = CurrState != STATE_READY; assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAnyMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | - (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK & ~(FlushFlag)) | (CurrState == STATE_FLUSH_INCR) | - (CurrState == STATE_FLUSH_WRITE_BACK) | (CurrState == STATE_FLUSH_WRITE_BACK & ~(FlushFlag) & CacheBusAck); // write enables internal to cache assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; @@ -181,8 +178,8 @@ module cachefsm assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE); // Flush and eviction controls - assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY_START) | - (CurrState == STATE_MISS_EVICT_DIRTY); + assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY) | + (CurrState == STATE_MISS_FETCH_WDV & CacheBusAck & VictimDirty); assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK); assign FlushWayAndNotAdrFlag = FlushWayFlag & ~FlushAdrFlag; @@ -194,7 +191,7 @@ module cachefsm assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR); // Bus interface controls assign CacheFetchLine = (CurrState == STATE_READY & DoAnyMiss); - assign CacheWriteLine = (CurrState == STATE_MISS_EVICT_DIRTY_START) | + assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_WDV & CacheBusAck & VictimDirty) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); // **** can this be simplified? assign SelAdr = (CurrState == STATE_READY & (IgnoreRequestTLB & ~TrapM)) | // Ignore Request is needed on TLB miss. @@ -203,7 +200,6 @@ module cachefsm (CurrState == STATE_READY & (DoAnyMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_EVICT_DIRTY) | - (CurrState == STATE_MISS_EVICT_DIRTY_START) | (CurrState == STATE_MISS_WRITE_CACHE_LINE) | resetDelay; diff --git a/pipelined/src/lsu/busfsm.sv b/pipelined/src/lsu/busfsm.sv index 225f2de6d..75c4d006b 100644 --- a/pipelined/src/lsu/busfsm.sv +++ b/pipelined/src/lsu/busfsm.sv @@ -126,10 +126,16 @@ module busfsm #(parameter integer WordCountThreshold, else BusNextState = STATE_BUS_READY; STATE_BUS_CPU_BUSY: if(CPUBusy) BusNextState = STATE_BUS_CPU_BUSY; else BusNextState = STATE_BUS_READY; - STATE_BUS_FETCH: if (WordCountFlag & LSUBusAck) BusNextState = STATE_BUS_READY; - else BusNextState = STATE_BUS_FETCH; - STATE_BUS_WRITE: if(WordCountFlag & LSUBusAck) BusNextState = STATE_BUS_READY; - else BusNextState = STATE_BUS_WRITE; + STATE_BUS_FETCH: if (WordCountFlag & LSUBusAck) begin + if (DCacheFetchLine) BusNextState = STATE_BUS_FETCH; + else if (DCacheWriteLine) BusNextState = STATE_BUS_WRITE; + else BusNextState = STATE_BUS_READY; + end else BusNextState = STATE_BUS_FETCH; + STATE_BUS_WRITE: if(WordCountFlag & LSUBusAck) begin + if (DCacheFetchLine) BusNextState = STATE_BUS_FETCH; + else if (DCacheWriteLine) BusNextState = STATE_BUS_WRITE; + else BusNextState = STATE_BUS_READY; + end else BusNextState = STATE_BUS_WRITE; default: BusNextState = STATE_BUS_READY; endcase end From 40e7cda84a7a00f0c720c4e74c8dbb3c2844adf6 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 24 Jul 2022 15:40:52 -0500 Subject: [PATCH 104/138] Don't use this commit yet. Untested. --- pipelined/src/cache/cache.sv | 4 ++-- pipelined/src/lsu/subwordwrite.sv | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index d28697e21..e168b2c15 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -42,8 +42,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] PAdr, // physical address input logic [(`XLEN-1)/8:0] ByteMask, - input logic [WORDLEN-1:0] FinalWriteData, - input logic FStore2, + input logic [WORDLEN-1:0] FinalWriteData, + input logic FStore2, output logic CacheCommitted, output logic CacheStall, // to performance counters to cpu diff --git a/pipelined/src/lsu/subwordwrite.sv b/pipelined/src/lsu/subwordwrite.sv index 43c50e882..b93062365 100644 --- a/pipelined/src/lsu/subwordwrite.sv +++ b/pipelined/src/lsu/subwordwrite.sv @@ -39,7 +39,10 @@ module subwordwrite ( ); // Compute byte masks - swbytemask swbytemask(.Size(LSUFunct3M[1:0]), .Adr(LSUPAdrM), .ByteMask(ByteMaskM)); + //swbytemask swbytemask(.Size(LSUFunct3M[1:0]), .Adr(LSUPAdrM), .ByteMask(ByteMaskM)); + // *** fix me. + swbytemaskword #(.WORDLEN(`XLEN)) + swbytemaskword (.Size(LSUFunct3M[2:0]), .Adr(LSUPAdrM), .ByteMask(ByteMaskM)); // Replicate data for subword writes if (`XLEN == 64) begin:sww From 62be9963d8e12869636d62d8adc27e157194c070 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 25 Jul 2022 01:50:38 +0000 Subject: [PATCH 105/138] Fixed synthesis by removing wally-config.vh at level above hdl directory --- synthDC/scripts/synth.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 9b72849f8..ed43a1ab3 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -27,7 +27,7 @@ set maxopt $::env(MAXOPT) set drive $::env(DRIVE) eval file copy -force ${cfg} {$outputDir/hdl/} -eval file copy -force ${cfg} $outputDir +#eval file copy -force ${cfg} $outputDir eval file copy -force [glob ${hdl_src}/../config/shared/*.vh] {$outputDir/hdl/} eval file copy -force [glob ${hdl_src}/*/*.sv] {$outputDir/hdl/} eval file copy -force [glob ${hdl_src}/*/flop/*.sv] {$outputDir/hdl/} From 7f7b3359b0ba2fb69e81843741420c4e49193bb0 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 25 Jul 2022 20:50:38 +0000 Subject: [PATCH 106/138] Cleaning up Makefiles for riscof to run each set of tests individually and eliminate warnings --- pipelined/regression/Makefile | 3 ++- synthDC/.synopsys_dc.setup | 2 +- tests/riscof/Makefile | 28 +++++++++++++++++++++++++++- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/pipelined/regression/Makefile b/pipelined/regression/Makefile index 6174464e4..b34af3208 100644 --- a/pipelined/regression/Makefile +++ b/pipelined/regression/Makefile @@ -24,8 +24,9 @@ clean: riscoftests: # Builds riscv-arch-test 64 and 32-bit versions and builds wally-riscv-arch-test 64 and 32-bit versions - make -C ../../tests/riscof/ make -C ../../tests/riscof/ XLEN=32 + make -C ../../tests/riscof/ XLEN=32 build_rv32e + make -C ../../tests/riscof/ XLEN=64 memfiles: make -f makefile-memfile wally-sim-files --jobs diff --git a/synthDC/.synopsys_dc.setup b/synthDC/.synopsys_dc.setup index ddb62533e..2490e5647 100755 --- a/synthDC/.synopsys_dc.setup +++ b/synthDC/.synopsys_dc.setup @@ -51,7 +51,7 @@ lappend search_path ./mapped # Set up User Information set company "Oklahoma State University" -set user "James E. Stine" +set user "Prof. James E. Stine" # Alias alias ra report_area diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index e343d8354..a97a0912b 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,7 +8,8 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) XLEN ?= 64 -all: root build_rv32e build_wally build_arch +#all: root build_rv32e build_wally build_arch +all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64 root: mkdir -p $(work_dir) @@ -23,6 +24,31 @@ fsd_fld_tempfix: find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fld-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fsd-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} +arch32: + riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser + rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv32i_m/ || echo "error suppressed" + +arch64: + riscof run --work-dir=$(work_dir) --config=config64.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser + rsync -a $(work_dir)/rv64i_m/ $(arch_workdir)/rv64i_m/ || echo "error suppressed" + +wally32: + riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run + rsync -a $(work_dir)/rv32i_m/ $(wally_workdir)/rv32i_m/ || echo "error suppressed" +# rsync -a $(work_dir)/rv64i_m/ $(wally_workdir)/rv32i_m/ || echo "error suppressed" + +wally64: + riscof run --work-dir=$(work_dir) --config=config64.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run + rsync -a $(work_dir)/rv32i_m/ $(wally_workdir)/rv64i_m/ || echo "error suppressed" +# rsync -a $(work_dir)/rv64i_m/ $(wally_workdir)/rv64i_m/ || echo "error suppressed" + +wally32e: + sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini + riscof run --work-dir=$(work_dir) --config=config32e.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run + rsync -a $(work_dir)/rv32i_m/ $(wally_workdir)/rv32i_m/ || echo "error suppressed" + rsync -a $(work_dir)/rv32e_unratified/ $(wally_workdir)/rv32e_unratified/ || echo "error suppressed" + + build_arch: fsd_fld_tempfix riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser # rm -rf $(arch_workdir)/rv$(XLEN)i_m From 416f5edfe03dd2f5b235095c15c7813a1ba36b9d Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 25 Jul 2022 21:15:56 +0000 Subject: [PATCH 107/138] More riscof makefile tuning --- pipelined/regression/Makefile | 7 ++++--- tests/riscof/Makefile | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pipelined/regression/Makefile b/pipelined/regression/Makefile index b34af3208..f15013280 100644 --- a/pipelined/regression/Makefile +++ b/pipelined/regression/Makefile @@ -24,9 +24,10 @@ clean: riscoftests: # Builds riscv-arch-test 64 and 32-bit versions and builds wally-riscv-arch-test 64 and 32-bit versions - make -C ../../tests/riscof/ XLEN=32 - make -C ../../tests/riscof/ XLEN=32 build_rv32e - make -C ../../tests/riscof/ XLEN=64 + make -C ../../tests/riscof/ +# make -C ../../tests/riscof/ XLEN=32 +# make -C ../../tests/riscof/ XLEN=32 build_rv32e +# make -C ../../tests/riscof/ XLEN=64 memfiles: make -f makefile-memfile wally-sim-files --jobs diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index a97a0912b..81e36b037 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -6,7 +6,7 @@ arch_workdir = $(work)/riscv-arch-test wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) -XLEN ?= 64 +#XLEN ?= 64 #all: root build_rv32e build_wally build_arch all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64 @@ -16,7 +16,9 @@ root: mkdir -p $(work) mkdir -p $(arch_workdir) mkdir -p $(wally_workdir) - sed 's,{0},$(current_dir),g;s,{1},$(XLEN)$(if $(findstring 64,$(XLEN)),gc,imc),g' config.ini > config$(XLEN).ini + sed 's,{0},$(current_dir),g;s,{1},32imc,g' config.ini > config32.ini + sed 's,{0},$(current_dir),g;s,{1},64gc,g' config.ini > config64.ini + sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini fsd_fld_tempfix: # this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests @@ -43,7 +45,6 @@ wally64: # rsync -a $(work_dir)/rv64i_m/ $(wally_workdir)/rv64i_m/ || echo "error suppressed" wally32e: - sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini riscof run --work-dir=$(work_dir) --config=config32e.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run rsync -a $(work_dir)/rv32i_m/ $(wally_workdir)/rv32i_m/ || echo "error suppressed" rsync -a $(work_dir)/rv32e_unratified/ $(wally_workdir)/rv32e_unratified/ || echo "error suppressed" From c6a58eb5b6fa82b426336a9e05a3b9c366790c51 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 25 Jul 2022 16:23:10 -0700 Subject: [PATCH 108/138] Tests making successfully except for rv32gc_arch32f, which has FLEN=64 and tries using fld/fsd --- pipelined/testbench/tests.vh | 306 +++++++++++++++++------------------ tests/riscof/Makefile | 31 +--- 2 files changed, 157 insertions(+), 180 deletions(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 9d4f78547..4154c6366 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1054,33 +1054,33 @@ string imperas32f[] = '{ string arch64d[] = '{ `RISCVARCHTEST, - "rv64i_m/D/src/fadd.d_b10-01.S", - "rv64i_m/D/src/fadd.d_b1-01.S", - "rv64i_m/D/src/fadd.d_b11-01.S", - "rv64i_m/D/src/fadd.d_b12-01.S", - "rv64i_m/D/src/fadd.d_b13-01.S", - "rv64i_m/D/src/fadd.d_b2-01.S", - "rv64i_m/D/src/fadd.d_b3-01.S", - "rv64i_m/D/src/fadd.d_b4-01.S", - "rv64i_m/D/src/fadd.d_b5-01.S", - "rv64i_m/D/src/fadd.d_b7-01.S", - "rv64i_m/D/src/fadd.d_b8-01.S", - "rv64i_m/D/src/fclass.d_b1-01.S", + "rv32i_m/D/src/fadd.d_b10-01.S", + "rv32i_m/D/src/fadd.d_b1-01.S", + "rv32i_m/D/src/fadd.d_b11-01.S", + "rv32i_m/D/src/fadd.d_b12-01.S", + "rv32i_m/D/src/fadd.d_b13-01.S", + "rv32i_m/D/src/fadd.d_b2-01.S", + "rv32i_m/D/src/fadd.d_b3-01.S", + "rv32i_m/D/src/fadd.d_b4-01.S", + "rv32i_m/D/src/fadd.d_b5-01.S", + "rv32i_m/D/src/fadd.d_b7-01.S", + "rv32i_m/D/src/fadd.d_b8-01.S", + "rv32i_m/D/src/fclass.d_b1-01.S", "rv64i_m/D/src/fcvt.d.l_b25-01.S", "rv64i_m/D/src/fcvt.d.l_b26-01.S", "rv64i_m/D/src/fcvt.d.lu_b25-01.S", "rv64i_m/D/src/fcvt.d.lu_b26-01.S", - "rv64i_m/D/src/fcvt.d.s_b1-01.S", - "rv64i_m/D/src/fcvt.d.s_b22-01.S", - "rv64i_m/D/src/fcvt.d.s_b23-01.S", - "rv64i_m/D/src/fcvt.d.s_b24-01.S", - "rv64i_m/D/src/fcvt.d.s_b27-01.S", - "rv64i_m/D/src/fcvt.d.s_b28-01.S", - "rv64i_m/D/src/fcvt.d.s_b29-01.S", - "rv64i_m/D/src/fcvt.d.w_b25-01.S", - "rv64i_m/D/src/fcvt.d.w_b26-01.S", - "rv64i_m/D/src/fcvt.d.wu_b25-01.S", - "rv64i_m/D/src/fcvt.d.wu_b26-01.S", + "rv32i_m/D/src/fcvt.d.s_b1-01.S", + "rv32i_m/D/src/fcvt.d.s_b22-01.S", + "rv32i_m/D/src/fcvt.d.s_b23-01.S", + "rv32i_m/D/src/fcvt.d.s_b24-01.S", + "rv32i_m/D/src/fcvt.d.s_b27-01.S", + "rv32i_m/D/src/fcvt.d.s_b28-01.S", + "rv32i_m/D/src/fcvt.d.s_b29-01.S", + "rv32i_m/D/src/fcvt.d.w_b25-01.S", + "rv32i_m/D/src/fcvt.d.w_b26-01.S", + "rv32i_m/D/src/fcvt.d.wu_b25-01.S", + "rv32i_m/D/src/fcvt.d.wu_b26-01.S", "rv64i_m/D/src/fcvt.l.d_b1-01.S", "rv64i_m/D/src/fcvt.l.d_b22-01.S", "rv64i_m/D/src/fcvt.l.d_b23-01.S", @@ -1095,135 +1095,135 @@ string imperas32f[] = '{ "rv64i_m/D/src/fcvt.lu.d_b27-01.S", "rv64i_m/D/src/fcvt.lu.d_b28-01.S", "rv64i_m/D/src/fcvt.lu.d_b29-01.S", - "rv64i_m/D/src/fcvt.s.d_b1-01.S", - "rv64i_m/D/src/fcvt.s.d_b22-01.S", - "rv64i_m/D/src/fcvt.s.d_b23-01.S", - "rv64i_m/D/src/fcvt.s.d_b24-01.S", - "rv64i_m/D/src/fcvt.s.d_b27-01.S", - "rv64i_m/D/src/fcvt.s.d_b28-01.S", - "rv64i_m/D/src/fcvt.s.d_b29-01.S", - "rv64i_m/D/src/fcvt.w.d_b1-01.S", - "rv64i_m/D/src/fcvt.w.d_b22-01.S", - "rv64i_m/D/src/fcvt.w.d_b23-01.S", - "rv64i_m/D/src/fcvt.w.d_b24-01.S", - "rv64i_m/D/src/fcvt.w.d_b27-01.S", - "rv64i_m/D/src/fcvt.w.d_b28-01.S", - "rv64i_m/D/src/fcvt.w.d_b29-01.S", - "rv64i_m/D/src/fcvt.wu.d_b1-01.S", - "rv64i_m/D/src/fcvt.wu.d_b22-01.S", - "rv64i_m/D/src/fcvt.wu.d_b23-01.S", - "rv64i_m/D/src/fcvt.wu.d_b24-01.S", - "rv64i_m/D/src/fcvt.wu.d_b27-01.S", - "rv64i_m/D/src/fcvt.wu.d_b28-01.S", - "rv64i_m/D/src/fcvt.wu.d_b29-01.S", - "rv64i_m/D/src/fdiv.d_b1-01.S", - "rv64i_m/D/src/fdiv.d_b20-01.S", - "rv64i_m/D/src/fdiv.d_b2-01.S", - "rv64i_m/D/src/fdiv.d_b21-01.S", - "rv64i_m/D/src/fdiv.d_b3-01.S", - "rv64i_m/D/src/fdiv.d_b4-01.S", - "rv64i_m/D/src/fdiv.d_b5-01.S", - "rv64i_m/D/src/fdiv.d_b6-01.S", - "rv64i_m/D/src/fdiv.d_b7-01.S", - "rv64i_m/D/src/fdiv.d_b8-01.S", - "rv64i_m/D/src/fdiv.d_b9-01.S", - "rv64i_m/D/src/feq.d_b1-01.S", - "rv64i_m/D/src/feq.d_b19-01.S", - "rv64i_m/D/src/fle.d_b1-01.S", - "rv64i_m/D/src/fle.d_b19-01.S", - "rv64i_m/D/src/flt.d_b1-01.S", - "rv64i_m/D/src/flt.d_b19-01.S", - // "rv64i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back - // "rv64i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266 - "rv64i_m/D/src/fmadd.d_b14-01.S", - "rv64i_m/D/src/fmadd.d_b16-01.S", - "rv64i_m/D/src/fmadd.d_b17-01.S", - "rv64i_m/D/src/fmadd.d_b18-01.S", - "rv64i_m/D/src/fmadd.d_b2-01.S", - "rv64i_m/D/src/fmadd.d_b3-01.S", - "rv64i_m/D/src/fmadd.d_b4-01.S", - "rv64i_m/D/src/fmadd.d_b5-01.S", - "rv64i_m/D/src/fmadd.d_b6-01.S", - "rv64i_m/D/src/fmadd.d_b7-01.S", - "rv64i_m/D/src/fmadd.d_b8-01.S", - "rv64i_m/D/src/fmax.d_b1-01.S", - "rv64i_m/D/src/fmax.d_b19-01.S", - "rv64i_m/D/src/fmin.d_b1-01.S", - "rv64i_m/D/src/fmin.d_b19-01.S", - "rv64i_m/D/src/fmsub.d_b14-01.S", - "rv64i_m/D/src/fmsub.d_b16-01.S", - "rv64i_m/D/src/fmsub.d_b17-01.S", - "rv64i_m/D/src/fmsub.d_b18-01.S", - "rv64i_m/D/src/fmsub.d_b2-01.S", - "rv64i_m/D/src/fmsub.d_b3-01.S", - "rv64i_m/D/src/fmsub.d_b4-01.S", - "rv64i_m/D/src/fmsub.d_b5-01.S", - "rv64i_m/D/src/fmsub.d_b6-01.S", - "rv64i_m/D/src/fmsub.d_b7-01.S", - "rv64i_m/D/src/fmsub.d_b8-01.S", - "rv64i_m/D/src/fmul.d_b1-01.S", - "rv64i_m/D/src/fmul.d_b2-01.S", - "rv64i_m/D/src/fmul.d_b3-01.S", - "rv64i_m/D/src/fmul.d_b4-01.S", - "rv64i_m/D/src/fmul.d_b5-01.S", - "rv64i_m/D/src/fmul.d_b6-01.S", - "rv64i_m/D/src/fmul.d_b7-01.S", - "rv64i_m/D/src/fmul.d_b8-01.S", - "rv64i_m/D/src/fmul.d_b9-01.S", - "rv64i_m/D/src/fmv.d.x_b25-01.S", - "rv64i_m/D/src/fmv.d.x_b26-01.S", - "rv64i_m/D/src/fmv.x.d_b1-01.S", - "rv64i_m/D/src/fmv.x.d_b22-01.S", - "rv64i_m/D/src/fmv.x.d_b23-01.S", - "rv64i_m/D/src/fmv.x.d_b24-01.S", - "rv64i_m/D/src/fmv.x.d_b27-01.S", - "rv64i_m/D/src/fmv.x.d_b28-01.S", - "rv64i_m/D/src/fmv.x.d_b29-01.S", - "rv64i_m/D/src/fnmadd.d_b14-01.S", - "rv64i_m/D/src/fnmadd.d_b16-01.S", - "rv64i_m/D/src/fnmadd.d_b17-01.S", - "rv64i_m/D/src/fnmadd.d_b18-01.S", - "rv64i_m/D/src/fnmadd.d_b2-01.S", - "rv64i_m/D/src/fnmadd.d_b3-01.S", - "rv64i_m/D/src/fnmadd.d_b4-01.S", - "rv64i_m/D/src/fnmadd.d_b5-01.S", - "rv64i_m/D/src/fnmadd.d_b6-01.S", - "rv64i_m/D/src/fnmadd.d_b7-01.S", - "rv64i_m/D/src/fnmadd.d_b8-01.S", - "rv64i_m/D/src/fnmsub.d_b14-01.S", - "rv64i_m/D/src/fnmsub.d_b16-01.S", - "rv64i_m/D/src/fnmsub.d_b17-01.S", - "rv64i_m/D/src/fnmsub.d_b18-01.S", - "rv64i_m/D/src/fnmsub.d_b2-01.S", - "rv64i_m/D/src/fnmsub.d_b3-01.S", - "rv64i_m/D/src/fnmsub.d_b4-01.S", - "rv64i_m/D/src/fnmsub.d_b5-01.S", - "rv64i_m/D/src/fnmsub.d_b6-01.S", - "rv64i_m/D/src/fnmsub.d_b7-01.S", - "rv64i_m/D/src/fnmsub.d_b8-01.S", - "rv64i_m/D/src/fsgnj.d_b1-01.S", - "rv64i_m/D/src/fsgnjn.d_b1-01.S", - "rv64i_m/D/src/fsgnjx.d_b1-01.S", - // "rv64i_m/D/src/fsqrt.d_b1-01.S", - // "rv64i_m/D/src/fsqrt.d_b20-01.S", - // "rv64i_m/D/src/fsqrt.d_b2-01.S", - // "rv64i_m/D/src/fsqrt.d_b3-01.S", - // "rv64i_m/D/src/fsqrt.d_b4-01.S", - // "rv64i_m/D/src/fsqrt.d_b5-01.S", - // "rv64i_m/D/src/fsqrt.d_b7-01.S", - // "rv64i_m/D/src/fsqrt.d_b8-01.S", - // "rv64i_m/D/src/fsqrt.d_b9-01.S", - "rv64i_m/D/src/fssub.d_b10-01.S", - "rv64i_m/D/src/fssub.d_b1-01.S", - "rv64i_m/D/src/fssub.d_b11-01.S", - "rv64i_m/D/src/fssub.d_b12-01.S", - "rv64i_m/D/src/fssub.d_b13-01.S", - "rv64i_m/D/src/fssub.d_b2-01.S", - "rv64i_m/D/src/fssub.d_b3-01.S", - "rv64i_m/D/src/fssub.d_b4-01.S", - "rv64i_m/D/src/fssub.d_b5-01.S", - "rv64i_m/D/src/fssub.d_b7-01.S", - "rv64i_m/D/src/fssub.d_b8-01.S" + "rv32i_m/D/src/fcvt.s.d_b1-01.S", + "rv32i_m/D/src/fcvt.s.d_b22-01.S", + "rv32i_m/D/src/fcvt.s.d_b23-01.S", + "rv32i_m/D/src/fcvt.s.d_b24-01.S", + "rv32i_m/D/src/fcvt.s.d_b27-01.S", + "rv32i_m/D/src/fcvt.s.d_b28-01.S", + "rv32i_m/D/src/fcvt.s.d_b29-01.S", + "rv32i_m/D/src/fcvt.w.d_b1-01.S", + "rv32i_m/D/src/fcvt.w.d_b22-01.S", + "rv32i_m/D/src/fcvt.w.d_b23-01.S", + "rv32i_m/D/src/fcvt.w.d_b24-01.S", + "rv32i_m/D/src/fcvt.w.d_b27-01.S", + "rv32i_m/D/src/fcvt.w.d_b28-01.S", + "rv32i_m/D/src/fcvt.w.d_b29-01.S", + "rv32i_m/D/src/fcvt.wu.d_b1-01.S", + "rv32i_m/D/src/fcvt.wu.d_b22-01.S", + "rv32i_m/D/src/fcvt.wu.d_b23-01.S", + "rv32i_m/D/src/fcvt.wu.d_b24-01.S", + "rv32i_m/D/src/fcvt.wu.d_b27-01.S", + "rv32i_m/D/src/fcvt.wu.d_b28-01.S", + "rv32i_m/D/src/fcvt.wu.d_b29-01.S", + "rv32i_m/D/src/fdiv.d_b1-01.S", + "rv32i_m/D/src/fdiv.d_b20-01.S", + "rv32i_m/D/src/fdiv.d_b2-01.S", + "rv32i_m/D/src/fdiv.d_b21-01.S", + "rv32i_m/D/src/fdiv.d_b3-01.S", + "rv32i_m/D/src/fdiv.d_b4-01.S", + "rv32i_m/D/src/fdiv.d_b5-01.S", + "rv32i_m/D/src/fdiv.d_b6-01.S", + "rv32i_m/D/src/fdiv.d_b7-01.S", + "rv32i_m/D/src/fdiv.d_b8-01.S", + "rv32i_m/D/src/fdiv.d_b9-01.S", + "rv32i_m/D/src/feq.d_b1-01.S", + "rv32i_m/D/src/feq.d_b19-01.S", + "rv32i_m/D/src/fle.d_b1-01.S", + "rv32i_m/D/src/fle.d_b19-01.S", + "rv32i_m/D/src/flt.d_b1-01.S", + "rv32i_m/D/src/flt.d_b19-01.S", + // "rv32i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back + // "rv32i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266 + "rv32i_m/D/src/fmadd.d_b14-01.S", + "rv32i_m/D/src/fmadd.d_b16-01.S", + "rv32i_m/D/src/fmadd.d_b17-01.S", + "rv32i_m/D/src/fmadd.d_b18-01.S", + "rv32i_m/D/src/fmadd.d_b2-01.S", + "rv32i_m/D/src/fmadd.d_b3-01.S", + "rv32i_m/D/src/fmadd.d_b4-01.S", + "rv32i_m/D/src/fmadd.d_b5-01.S", + "rv32i_m/D/src/fmadd.d_b6-01.S", + "rv32i_m/D/src/fmadd.d_b7-01.S", + "rv32i_m/D/src/fmadd.d_b8-01.S", + "rv32i_m/D/src/fmax.d_b1-01.S", + "rv32i_m/D/src/fmax.d_b19-01.S", + "rv32i_m/D/src/fmin.d_b1-01.S", + "rv32i_m/D/src/fmin.d_b19-01.S", + "rv32i_m/D/src/fmsub.d_b14-01.S", + "rv32i_m/D/src/fmsub.d_b16-01.S", + "rv32i_m/D/src/fmsub.d_b17-01.S", + "rv32i_m/D/src/fmsub.d_b18-01.S", + "rv32i_m/D/src/fmsub.d_b2-01.S", + "rv32i_m/D/src/fmsub.d_b3-01.S", + "rv32i_m/D/src/fmsub.d_b4-01.S", + "rv32i_m/D/src/fmsub.d_b5-01.S", + "rv32i_m/D/src/fmsub.d_b6-01.S", + "rv32i_m/D/src/fmsub.d_b7-01.S", + "rv32i_m/D/src/fmsub.d_b8-01.S", + "rv32i_m/D/src/fmul.d_b1-01.S", + "rv32i_m/D/src/fmul.d_b2-01.S", + "rv32i_m/D/src/fmul.d_b3-01.S", + "rv32i_m/D/src/fmul.d_b4-01.S", + "rv32i_m/D/src/fmul.d_b5-01.S", + "rv32i_m/D/src/fmul.d_b6-01.S", + "rv32i_m/D/src/fmul.d_b7-01.S", + "rv32i_m/D/src/fmul.d_b8-01.S", + "rv32i_m/D/src/fmul.d_b9-01.S", + "rv32i_m/D/src/fmv.d.x_b25-01.S", + "rv32i_m/D/src/fmv.d.x_b26-01.S", + "rv32i_m/D/src/fmv.x.d_b1-01.S", + "rv32i_m/D/src/fmv.x.d_b22-01.S", + "rv32i_m/D/src/fmv.x.d_b23-01.S", + "rv32i_m/D/src/fmv.x.d_b24-01.S", + "rv32i_m/D/src/fmv.x.d_b27-01.S", + "rv32i_m/D/src/fmv.x.d_b28-01.S", + "rv32i_m/D/src/fmv.x.d_b29-01.S", + "rv32i_m/D/src/fnmadd.d_b14-01.S", + "rv32i_m/D/src/fnmadd.d_b16-01.S", + "rv32i_m/D/src/fnmadd.d_b17-01.S", + "rv32i_m/D/src/fnmadd.d_b18-01.S", + "rv32i_m/D/src/fnmadd.d_b2-01.S", + "rv32i_m/D/src/fnmadd.d_b3-01.S", + "rv32i_m/D/src/fnmadd.d_b4-01.S", + "rv32i_m/D/src/fnmadd.d_b5-01.S", + "rv32i_m/D/src/fnmadd.d_b6-01.S", + "rv32i_m/D/src/fnmadd.d_b7-01.S", + "rv32i_m/D/src/fnmadd.d_b8-01.S", + "rv32i_m/D/src/fnmsub.d_b14-01.S", + "rv32i_m/D/src/fnmsub.d_b16-01.S", + "rv32i_m/D/src/fnmsub.d_b17-01.S", + "rv32i_m/D/src/fnmsub.d_b18-01.S", + "rv32i_m/D/src/fnmsub.d_b2-01.S", + "rv32i_m/D/src/fnmsub.d_b3-01.S", + "rv32i_m/D/src/fnmsub.d_b4-01.S", + "rv32i_m/D/src/fnmsub.d_b5-01.S", + "rv32i_m/D/src/fnmsub.d_b6-01.S", + "rv32i_m/D/src/fnmsub.d_b7-01.S", + "rv32i_m/D/src/fnmsub.d_b8-01.S", + "rv32i_m/D/src/fsgnj.d_b1-01.S", + "rv32i_m/D/src/fsgnjn.d_b1-01.S", + "rv32i_m/D/src/fsgnjx.d_b1-01.S", + // "rv32i_m/D/src/fsqrt.d_b1-01.S", + // "rv32i_m/D/src/fsqrt.d_b20-01.S", + // "rv32i_m/D/src/fsqrt.d_b2-01.S", + // "rv32i_m/D/src/fsqrt.d_b3-01.S", + // "rv32i_m/D/src/fsqrt.d_b4-01.S", + // "rv32i_m/D/src/fsqrt.d_b5-01.S", + // "rv32i_m/D/src/fsqrt.d_b7-01.S", + // "rv32i_m/D/src/fsqrt.d_b8-01.S", + // "rv32i_m/D/src/fsqrt.d_b9-01.S", + "rv32i_m/D/src/fssub.d_b10-01.S", + "rv32i_m/D/src/fssub.d_b1-01.S", + "rv32i_m/D/src/fssub.d_b11-01.S", + "rv32i_m/D/src/fssub.d_b12-01.S", + "rv32i_m/D/src/fssub.d_b13-01.S", + "rv32i_m/D/src/fssub.d_b2-01.S", + "rv32i_m/D/src/fssub.d_b3-01.S", + "rv32i_m/D/src/fssub.d_b4-01.S", + "rv32i_m/D/src/fssub.d_b5-01.S", + "rv32i_m/D/src/fssub.d_b7-01.S", + "rv32i_m/D/src/fssub.d_b8-01.S" }; string arch32priv[] = '{ @@ -1611,4 +1611,4 @@ string imperas32f[] = '{ string wally32d[] = '{ `WALLYTEST, "rv32i_m/D/src/WALLY-fld.S" - }; \ No newline at end of file + }; diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index 81e36b037..9547f721c 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,8 +8,8 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) #XLEN ?= 64 -#all: root build_rv32e build_wally build_arch -all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64 +#all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64 +all: root fsd_fld_tempfix arch32 root: mkdir -p $(work_dir) @@ -37,12 +37,10 @@ arch64: wally32: riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run rsync -a $(work_dir)/rv32i_m/ $(wally_workdir)/rv32i_m/ || echo "error suppressed" -# rsync -a $(work_dir)/rv64i_m/ $(wally_workdir)/rv32i_m/ || echo "error suppressed" wally64: riscof run --work-dir=$(work_dir) --config=config64.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run - rsync -a $(work_dir)/rv32i_m/ $(wally_workdir)/rv64i_m/ || echo "error suppressed" -# rsync -a $(work_dir)/rv64i_m/ $(wally_workdir)/rv64i_m/ || echo "error suppressed" + rsync -a $(work_dir)/rv64i_m/ $(wally_workdir)/rv64i_m/ || echo "error suppressed" wally32e: riscof run --work-dir=$(work_dir) --config=config32e.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run @@ -50,27 +48,6 @@ wally32e: rsync -a $(work_dir)/rv32e_unratified/ $(wally_workdir)/rv32e_unratified/ || echo "error suppressed" -build_arch: fsd_fld_tempfix - riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser -# rm -rf $(arch_workdir)/rv$(XLEN)i_m - rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" - rsync -a $(work_dir)/rv64i_m/ $(arch_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" - -build_wally: - riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run -# riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run 2>&1 | tee log.txt -# rm -rf $(wally_workdir)/rv$(XLEN)i_m -# mv -f $(work_dir)/rv$(XLEN)i_m $(wally_workdir)/ - rsync -a $(work_dir)/rv32i_m/ $(wally_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" - rsync -a $(work_dir)/rv64i_m/ $(wally_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" - -build_rv32e: - sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini - riscof run --work-dir=$(work_dir) --config=config32e.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run -# riscof --verbose debug run --work-dir=$(work_dir) --config=config32e.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run 2>&1 | tee log.txt - rsync -a $(work_dir)/rv32i_m/ $(wally_workdir)/rv32i_m/ || echo "error suppressed" - rsync -a $(work_dir)/rv32e_unratified/ $(wally_workdir)/rv32e_unratified/ || echo "error suppressed" - memfile: find $(work) -type f -name "*.elf" | grep "rv64i_m" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 64 --input "$$f" --output "$$f.memfile"; done find $(work) -type f -name "*.elf" | grep "rv32i_m" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done @@ -82,4 +59,4 @@ clean: rm -f config32e.ini rm -rf $(work_dir) rm -rf $(wally_workdir) - rm -rf $(arch_workdir) \ No newline at end of file + rm -rf $(arch_workdir) From 5c54c5b521f8d64c9d20ac31d7657570ba4a6099 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 25 Jul 2022 23:29:05 +0000 Subject: [PATCH 109/138] Added rv32f tests to RV64gc --- pipelined/regression/regression-wally | 2 +- pipelined/testbench/testbench.sv | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally index 821246a5f..b3e9d3c8c 100755 --- a/pipelined/regression/regression-wally +++ b/pipelined/regression/regression-wally @@ -64,7 +64,7 @@ tc = TestCase( grepstr="400100000 instructions") configs.append(tc) -tests64gc = ["arch64i", "arch64priv", "arch64c", "arch64m", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64m", "wally64a", "imperas64c", "wally64periph", "wally64priv"] # , "imperas64mmu" "wally64i", #, "testsBP64"] +tests64gc = ["arch64i", "arch64priv", "arch64c", "arch64m", "arch32f", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64m", "wally64a", "imperas64c", "wally64periph", "wally64priv"] # , "imperas64mmu" "wally64i", #, "testsBP64"] for test in tests64gc: tc = TestCase( name=test, diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 9bc1ce246..42095d124 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -89,6 +89,7 @@ logic [3:0] dummy; if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv}; else tests = {arch64c}; "arch64m": if (`M_SUPPORTED) tests = arch64m; + "arch32f": if (`F_SUPPORTED) tests = arch32f; // 32-bit FP tests run on rv64f "arch64d": if (`D_SUPPORTED) tests = arch64d; "imperas64i": tests = imperas64i; "imperas64f": if (`F_SUPPORTED) tests = imperas64f; From 2d7f4b133ccb281b269f70c75935d352dd5b3eb7 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 26 Jul 2022 06:19:13 -0700 Subject: [PATCH 110/138] More work toward riscof tests --- pipelined/regression/regression-wally | 4 +- pipelined/testbench/testbench.sv | 3 +- pipelined/testbench/tests.vh | 602 +++++++++++++++------ tests/riscof/Makefile | 6 +- tests/riscof/sail_cSim/riscof_sail_cSim.py | 4 +- 5 files changed, 461 insertions(+), 158 deletions(-) diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally index 821246a5f..c86f19535 100755 --- a/pipelined/regression/regression-wally +++ b/pipelined/regression/regression-wally @@ -64,7 +64,7 @@ tc = TestCase( grepstr="400100000 instructions") configs.append(tc) -tests64gc = ["arch64i", "arch64priv", "arch64c", "arch64m", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64m", "wally64a", "imperas64c", "wally64periph", "wally64priv"] # , "imperas64mmu" "wally64i", #, "testsBP64"] +tests64gc = ["arch64i", "arch64priv", "arch64c", "arch64m", "arch64f", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64m", "wally64a", "imperas64c", "wally64periph", "wally64priv"] # , "imperas64mmu" "wally64i", #, "testsBP64"] for test in tests64gc: tc = TestCase( name=test, @@ -73,7 +73,7 @@ for test in tests64gc: grepstr="All tests ran without failures") configs.append(tc) -tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i", +tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "arch32d", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i", for test in tests32gc: tc = TestCase( name=test, diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 9bc1ce246..58163c57e 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -89,6 +89,7 @@ logic [3:0] dummy; if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv}; else tests = {arch64c}; "arch64m": if (`M_SUPPORTED) tests = arch64m; + "arch64f": if (`D_SUPPORTED) tests = arch64f; "arch64d": if (`D_SUPPORTED) tests = arch64d; "imperas64i": tests = imperas64i; "imperas64f": if (`F_SUPPORTED) tests = imperas64f; @@ -112,9 +113,9 @@ logic [3:0] dummy; else tests = {arch32c}; "arch32m": if (`M_SUPPORTED) tests = arch32m; "arch32f": if (`F_SUPPORTED) tests = arch32f; + "arch32d": if (`D_SUPPORTED) tests = arch32d; "imperas32i": tests = imperas32i; "imperas32f": if (`F_SUPPORTED) tests = imperas32f; - // "wally32d": if (`D_SUPPORTED) tests = wally32d; "imperas32m": if (`M_SUPPORTED) tests = imperas32m; "wally32a": if (`A_SUPPORTED) tests = wally32a; "imperas32c": if (`C_SUPPORTED) tests = imperas32c; diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 4154c6366..587733c39 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1052,35 +1052,186 @@ string imperas32f[] = '{ "rv64i_m/I/src/xori-01.S" }; + string arch64f[] = '{ + `RISCVARCHTEST, + "rv64i_m/F/src/fadd_b10-01.S", + "rv64i_m/F/src/fadd_b1-01.S", + "rv64i_m/F/src/fadd_b11-01.S", + "rv64i_m/F/src/fadd_b12-01.S", + "rv64i_m/F/src/fadd_b13-01.S", + "rv64i_m/F/src/fadd_b2-01.S", + "rv64i_m/F/src/fadd_b3-01.S", + "rv64i_m/F/src/fadd_b4-01.S", + "rv64i_m/F/src/fadd_b5-01.S", + "rv64i_m/F/src/fadd_b7-01.S", + "rv64i_m/F/src/fadd_b8-01.S", + "rv64i_m/F/src/fclass_b1-01.S", + "rv64i_m/F/src/fcvt.s.w_b25-01.S", + "rv64i_m/F/src/fcvt.s.w_b26-01.S", + "rv64i_m/F/src/fcvt.s.wu_b25-01.S", + "rv64i_m/F/src/fcvt.s.wu_b26-01.S", + "rv64i_m/F/src/fcvt.w.s_b1-01.S", + "rv64i_m/F/src/fcvt.w.s_b22-01.S", + "rv64i_m/F/src/fcvt.w.s_b23-01.S", + "rv64i_m/F/src/fcvt.w.s_b24-01.S", + "rv64i_m/F/src/fcvt.w.s_b27-01.S", + "rv64i_m/F/src/fcvt.w.s_b28-01.S", + "rv64i_m/F/src/fcvt.w.s_b29-01.S", + "rv64i_m/F/src/fcvt.wu.s_b1-01.S", + "rv64i_m/F/src/fcvt.wu.s_b22-01.S", + "rv64i_m/F/src/fcvt.wu.s_b23-01.S", + "rv64i_m/F/src/fcvt.wu.s_b24-01.S", + "rv64i_m/F/src/fcvt.wu.s_b27-01.S", + "rv64i_m/F/src/fcvt.wu.s_b28-01.S", + "rv64i_m/F/src/fcvt.wu.s_b29-01.S", + "rv64i_m/F/src/fdiv_b1-01.S", + "rv64i_m/F/src/fdiv_b20-01.S", + "rv64i_m/F/src/fdiv_b2-01.S", + "rv64i_m/F/src/fdiv_b21-01.S", + "rv64i_m/F/src/fdiv_b3-01.S", + "rv64i_m/F/src/fdiv_b4-01.S", + "rv64i_m/F/src/fdiv_b5-01.S", + "rv64i_m/F/src/fdiv_b6-01.S", + "rv64i_m/F/src/fdiv_b7-01.S", + "rv64i_m/F/src/fdiv_b8-01.S", + "rv64i_m/F/src/fdiv_b9-01.S", + "rv64i_m/F/src/feq_b1-01.S", + "rv64i_m/F/src/feq_b19-01.S", + "rv64i_m/F/src/fle_b1-01.S", + "rv64i_m/F/src/fle_b19-01.S", + "rv64i_m/F/src/flt_b1-01.S", + "rv64i_m/F/src/flt_b19-01.S", + // "rv64i_m/F/src/flw-align-01.S", + "rv64i_m/F/src/fmadd_b1-01.S", + "rv64i_m/F/src/fmadd_b14-01.S", + // "rv64i_m/F/src/fmadd_b15-01.S", + "rv64i_m/F/src/fmadd_b16-01.S", + "rv64i_m/F/src/fmadd_b17-01.S", + "rv64i_m/F/src/fmadd_b18-01.S", + "rv64i_m/F/src/fmadd_b2-01.S", + "rv64i_m/F/src/fmadd_b3-01.S", + "rv64i_m/F/src/fmadd_b4-01.S", + "rv64i_m/F/src/fmadd_b5-01.S", + "rv64i_m/F/src/fmadd_b6-01.S", + "rv64i_m/F/src/fmadd_b7-01.S", + "rv64i_m/F/src/fmadd_b8-01.S", + "rv64i_m/F/src/fmax_b1-01.S", + "rv64i_m/F/src/fmax_b19-01.S", + "rv64i_m/F/src/fmin_b1-01.S", + "rv64i_m/F/src/fmin_b19-01.S", + "rv64i_m/F/src/fmsub_b1-01.S", + "rv64i_m/F/src/fmsub_b14-01.S", + "rv64i_m/F/src/fmsub_b15-01.S", + "rv64i_m/F/src/fmsub_b16-01.S", + "rv64i_m/F/src/fmsub_b17-01.S", + "rv64i_m/F/src/fmsub_b18-01.S", + "rv64i_m/F/src/fmsub_b2-01.S", + "rv64i_m/F/src/fmsub_b3-01.S", + "rv64i_m/F/src/fmsub_b4-01.S", + "rv64i_m/F/src/fmsub_b5-01.S", + "rv64i_m/F/src/fmsub_b6-01.S", + "rv64i_m/F/src/fmsub_b7-01.S", + "rv64i_m/F/src/fmsub_b8-01.S", + "rv64i_m/F/src/fmul_b1-01.S", + "rv64i_m/F/src/fmul_b2-01.S", + "rv64i_m/F/src/fmul_b3-01.S", + "rv64i_m/F/src/fmul_b4-01.S", + "rv64i_m/F/src/fmul_b5-01.S", + "rv64i_m/F/src/fmul_b6-01.S", + "rv64i_m/F/src/fmul_b7-01.S", + "rv64i_m/F/src/fmul_b8-01.S", + "rv64i_m/F/src/fmul_b9-01.S", + "rv64i_m/F/src/fmv.w.x_b25-01.S", + "rv64i_m/F/src/fmv.w.x_b26-01.S", + "rv64i_m/F/src/fmv.x.w_b1-01.S", + "rv64i_m/F/src/fmv.x.w_b22-01.S", + "rv64i_m/F/src/fmv.x.w_b23-01.S", + "rv64i_m/F/src/fmv.x.w_b24-01.S", + "rv64i_m/F/src/fmv.x.w_b27-01.S", + "rv64i_m/F/src/fmv.x.w_b28-01.S", + "rv64i_m/F/src/fmv.x.w_b29-01.S", + "rv64i_m/F/src/fnmadd_b1-01.S", + "rv64i_m/F/src/fnmadd_b14-01.S", + // "rv64i_m/F/src/fnmadd_b15-01.S", + "rv64i_m/F/src/fnmadd_b16-01.S", + "rv64i_m/F/src/fnmadd_b17-01.S", + "rv64i_m/F/src/fnmadd_b18-01.S", + "rv64i_m/F/src/fnmadd_b2-01.S", + "rv64i_m/F/src/fnmadd_b3-01.S", + "rv64i_m/F/src/fnmadd_b4-01.S", + "rv64i_m/F/src/fnmadd_b5-01.S", + "rv64i_m/F/src/fnmadd_b6-01.S", + "rv64i_m/F/src/fnmadd_b7-01.S", + "rv64i_m/F/src/fnmadd_b8-01.S", + "rv64i_m/F/src/fnmsub_b1-01.S", + "rv64i_m/F/src/fnmsub_b14-01.S", + // "rv64i_m/F/src/fnmsub_b15-01.S", + "rv64i_m/F/src/fnmsub_b16-01.S", + "rv64i_m/F/src/fnmsub_b17-01.S", + "rv64i_m/F/src/fnmsub_b18-01.S", + "rv64i_m/F/src/fnmsub_b2-01.S", + "rv64i_m/F/src/fnmsub_b3-01.S", + "rv64i_m/F/src/fnmsub_b4-01.S", + "rv64i_m/F/src/fnmsub_b5-01.S", + "rv64i_m/F/src/fnmsub_b6-01.S", + "rv64i_m/F/src/fnmsub_b7-01.S", + "rv64i_m/F/src/fnmsub_b8-01.S", + "rv64i_m/F/src/fsgnj_b1-01.S", + "rv64i_m/F/src/fsgnjn_b1-01.S", + "rv64i_m/F/src/fsgnjx_b1-01.S", + // "rv64i_m/F/src/fsqrt_b1-01.S", + // "rv64i_m/F/src/fsqrt_b20-01.S", + // "rv64i_m/F/src/fsqrt_b2-01.S", + // "rv64i_m/F/src/fsqrt_b3-01.S", + // "rv64i_m/F/src/fsqrt_b4-01.S", + // "rv64i_m/F/src/fsqrt_b5-01.S", + // "rv64i_m/F/src/fsqrt_b7-01.S", + // "rv64i_m/F/src/fsqrt_b8-01.S", + // "rv64i_m/F/src/fsqrt_b9-01.S", + "rv64i_m/F/src/fsub_b10-01.S", + "rv64i_m/F/src/fsub_b1-01.S", + "rv64i_m/F/src/fsub_b11-01.S", + "rv64i_m/F/src/fsub_b12-01.S", + "rv64i_m/F/src/fsub_b13-01.S", + "rv64i_m/F/src/fsub_b2-01.S", + "rv64i_m/F/src/fsub_b3-01.S", + "rv64i_m/F/src/fsub_b4-01.S", + "rv64i_m/F/src/fsub_b5-01.S", + "rv64i_m/F/src/fsub_b7-01.S", + "rv64i_m/F/src/fsub_b8-01.S" + // "rv64i_m/F/src/fsw-align-01.S" + }; + + string arch64d[] = '{ `RISCVARCHTEST, - "rv32i_m/D/src/fadd.d_b10-01.S", - "rv32i_m/D/src/fadd.d_b1-01.S", - "rv32i_m/D/src/fadd.d_b11-01.S", - "rv32i_m/D/src/fadd.d_b12-01.S", - "rv32i_m/D/src/fadd.d_b13-01.S", - "rv32i_m/D/src/fadd.d_b2-01.S", - "rv32i_m/D/src/fadd.d_b3-01.S", - "rv32i_m/D/src/fadd.d_b4-01.S", - "rv32i_m/D/src/fadd.d_b5-01.S", - "rv32i_m/D/src/fadd.d_b7-01.S", - "rv32i_m/D/src/fadd.d_b8-01.S", - "rv32i_m/D/src/fclass.d_b1-01.S", + "rv64i_m/D/src/fadd.d_b10-01.S", + "rv64i_m/D/src/fadd.d_b1-01.S", + "rv64i_m/D/src/fadd.d_b11-01.S", + "rv64i_m/D/src/fadd.d_b12-01.S", + "rv64i_m/D/src/fadd.d_b13-01.S", + "rv64i_m/D/src/fadd.d_b2-01.S", + "rv64i_m/D/src/fadd.d_b3-01.S", + "rv64i_m/D/src/fadd.d_b4-01.S", + "rv64i_m/D/src/fadd.d_b5-01.S", + "rv64i_m/D/src/fadd.d_b7-01.S", + "rv64i_m/D/src/fadd.d_b8-01.S", + "rv64i_m/D/src/fclass.d_b1-01.S", "rv64i_m/D/src/fcvt.d.l_b25-01.S", "rv64i_m/D/src/fcvt.d.l_b26-01.S", "rv64i_m/D/src/fcvt.d.lu_b25-01.S", "rv64i_m/D/src/fcvt.d.lu_b26-01.S", - "rv32i_m/D/src/fcvt.d.s_b1-01.S", - "rv32i_m/D/src/fcvt.d.s_b22-01.S", - "rv32i_m/D/src/fcvt.d.s_b23-01.S", - "rv32i_m/D/src/fcvt.d.s_b24-01.S", - "rv32i_m/D/src/fcvt.d.s_b27-01.S", - "rv32i_m/D/src/fcvt.d.s_b28-01.S", - "rv32i_m/D/src/fcvt.d.s_b29-01.S", - "rv32i_m/D/src/fcvt.d.w_b25-01.S", - "rv32i_m/D/src/fcvt.d.w_b26-01.S", - "rv32i_m/D/src/fcvt.d.wu_b25-01.S", - "rv32i_m/D/src/fcvt.d.wu_b26-01.S", + "rv64i_m/D/src/fcvt.d.s_b1-01.S", + "rv64i_m/D/src/fcvt.d.s_b22-01.S", + "rv64i_m/D/src/fcvt.d.s_b23-01.S", + "rv64i_m/D/src/fcvt.d.s_b24-01.S", + "rv64i_m/D/src/fcvt.d.s_b27-01.S", + "rv64i_m/D/src/fcvt.d.s_b28-01.S", + "rv64i_m/D/src/fcvt.d.s_b29-01.S", + "rv64i_m/D/src/fcvt.d.w_b25-01.S", + "rv64i_m/D/src/fcvt.d.w_b26-01.S", + "rv64i_m/D/src/fcvt.d.wu_b25-01.S", + "rv64i_m/D/src/fcvt.d.wu_b26-01.S", "rv64i_m/D/src/fcvt.l.d_b1-01.S", "rv64i_m/D/src/fcvt.l.d_b22-01.S", "rv64i_m/D/src/fcvt.l.d_b23-01.S", @@ -1095,135 +1246,135 @@ string imperas32f[] = '{ "rv64i_m/D/src/fcvt.lu.d_b27-01.S", "rv64i_m/D/src/fcvt.lu.d_b28-01.S", "rv64i_m/D/src/fcvt.lu.d_b29-01.S", - "rv32i_m/D/src/fcvt.s.d_b1-01.S", - "rv32i_m/D/src/fcvt.s.d_b22-01.S", - "rv32i_m/D/src/fcvt.s.d_b23-01.S", - "rv32i_m/D/src/fcvt.s.d_b24-01.S", - "rv32i_m/D/src/fcvt.s.d_b27-01.S", - "rv32i_m/D/src/fcvt.s.d_b28-01.S", - "rv32i_m/D/src/fcvt.s.d_b29-01.S", - "rv32i_m/D/src/fcvt.w.d_b1-01.S", - "rv32i_m/D/src/fcvt.w.d_b22-01.S", - "rv32i_m/D/src/fcvt.w.d_b23-01.S", - "rv32i_m/D/src/fcvt.w.d_b24-01.S", - "rv32i_m/D/src/fcvt.w.d_b27-01.S", - "rv32i_m/D/src/fcvt.w.d_b28-01.S", - "rv32i_m/D/src/fcvt.w.d_b29-01.S", - "rv32i_m/D/src/fcvt.wu.d_b1-01.S", - "rv32i_m/D/src/fcvt.wu.d_b22-01.S", - "rv32i_m/D/src/fcvt.wu.d_b23-01.S", - "rv32i_m/D/src/fcvt.wu.d_b24-01.S", - "rv32i_m/D/src/fcvt.wu.d_b27-01.S", - "rv32i_m/D/src/fcvt.wu.d_b28-01.S", - "rv32i_m/D/src/fcvt.wu.d_b29-01.S", - "rv32i_m/D/src/fdiv.d_b1-01.S", - "rv32i_m/D/src/fdiv.d_b20-01.S", - "rv32i_m/D/src/fdiv.d_b2-01.S", - "rv32i_m/D/src/fdiv.d_b21-01.S", - "rv32i_m/D/src/fdiv.d_b3-01.S", - "rv32i_m/D/src/fdiv.d_b4-01.S", - "rv32i_m/D/src/fdiv.d_b5-01.S", - "rv32i_m/D/src/fdiv.d_b6-01.S", - "rv32i_m/D/src/fdiv.d_b7-01.S", - "rv32i_m/D/src/fdiv.d_b8-01.S", - "rv32i_m/D/src/fdiv.d_b9-01.S", - "rv32i_m/D/src/feq.d_b1-01.S", - "rv32i_m/D/src/feq.d_b19-01.S", - "rv32i_m/D/src/fle.d_b1-01.S", - "rv32i_m/D/src/fle.d_b19-01.S", - "rv32i_m/D/src/flt.d_b1-01.S", - "rv32i_m/D/src/flt.d_b19-01.S", - // "rv32i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back - // "rv32i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266 - "rv32i_m/D/src/fmadd.d_b14-01.S", - "rv32i_m/D/src/fmadd.d_b16-01.S", - "rv32i_m/D/src/fmadd.d_b17-01.S", - "rv32i_m/D/src/fmadd.d_b18-01.S", - "rv32i_m/D/src/fmadd.d_b2-01.S", - "rv32i_m/D/src/fmadd.d_b3-01.S", - "rv32i_m/D/src/fmadd.d_b4-01.S", - "rv32i_m/D/src/fmadd.d_b5-01.S", - "rv32i_m/D/src/fmadd.d_b6-01.S", - "rv32i_m/D/src/fmadd.d_b7-01.S", - "rv32i_m/D/src/fmadd.d_b8-01.S", - "rv32i_m/D/src/fmax.d_b1-01.S", - "rv32i_m/D/src/fmax.d_b19-01.S", - "rv32i_m/D/src/fmin.d_b1-01.S", - "rv32i_m/D/src/fmin.d_b19-01.S", - "rv32i_m/D/src/fmsub.d_b14-01.S", - "rv32i_m/D/src/fmsub.d_b16-01.S", - "rv32i_m/D/src/fmsub.d_b17-01.S", - "rv32i_m/D/src/fmsub.d_b18-01.S", - "rv32i_m/D/src/fmsub.d_b2-01.S", - "rv32i_m/D/src/fmsub.d_b3-01.S", - "rv32i_m/D/src/fmsub.d_b4-01.S", - "rv32i_m/D/src/fmsub.d_b5-01.S", - "rv32i_m/D/src/fmsub.d_b6-01.S", - "rv32i_m/D/src/fmsub.d_b7-01.S", - "rv32i_m/D/src/fmsub.d_b8-01.S", - "rv32i_m/D/src/fmul.d_b1-01.S", - "rv32i_m/D/src/fmul.d_b2-01.S", - "rv32i_m/D/src/fmul.d_b3-01.S", - "rv32i_m/D/src/fmul.d_b4-01.S", - "rv32i_m/D/src/fmul.d_b5-01.S", - "rv32i_m/D/src/fmul.d_b6-01.S", - "rv32i_m/D/src/fmul.d_b7-01.S", - "rv32i_m/D/src/fmul.d_b8-01.S", - "rv32i_m/D/src/fmul.d_b9-01.S", - "rv32i_m/D/src/fmv.d.x_b25-01.S", - "rv32i_m/D/src/fmv.d.x_b26-01.S", - "rv32i_m/D/src/fmv.x.d_b1-01.S", - "rv32i_m/D/src/fmv.x.d_b22-01.S", - "rv32i_m/D/src/fmv.x.d_b23-01.S", - "rv32i_m/D/src/fmv.x.d_b24-01.S", - "rv32i_m/D/src/fmv.x.d_b27-01.S", - "rv32i_m/D/src/fmv.x.d_b28-01.S", - "rv32i_m/D/src/fmv.x.d_b29-01.S", - "rv32i_m/D/src/fnmadd.d_b14-01.S", - "rv32i_m/D/src/fnmadd.d_b16-01.S", - "rv32i_m/D/src/fnmadd.d_b17-01.S", - "rv32i_m/D/src/fnmadd.d_b18-01.S", - "rv32i_m/D/src/fnmadd.d_b2-01.S", - "rv32i_m/D/src/fnmadd.d_b3-01.S", - "rv32i_m/D/src/fnmadd.d_b4-01.S", - "rv32i_m/D/src/fnmadd.d_b5-01.S", - "rv32i_m/D/src/fnmadd.d_b6-01.S", - "rv32i_m/D/src/fnmadd.d_b7-01.S", - "rv32i_m/D/src/fnmadd.d_b8-01.S", - "rv32i_m/D/src/fnmsub.d_b14-01.S", - "rv32i_m/D/src/fnmsub.d_b16-01.S", - "rv32i_m/D/src/fnmsub.d_b17-01.S", - "rv32i_m/D/src/fnmsub.d_b18-01.S", - "rv32i_m/D/src/fnmsub.d_b2-01.S", - "rv32i_m/D/src/fnmsub.d_b3-01.S", - "rv32i_m/D/src/fnmsub.d_b4-01.S", - "rv32i_m/D/src/fnmsub.d_b5-01.S", - "rv32i_m/D/src/fnmsub.d_b6-01.S", - "rv32i_m/D/src/fnmsub.d_b7-01.S", - "rv32i_m/D/src/fnmsub.d_b8-01.S", - "rv32i_m/D/src/fsgnj.d_b1-01.S", - "rv32i_m/D/src/fsgnjn.d_b1-01.S", - "rv32i_m/D/src/fsgnjx.d_b1-01.S", - // "rv32i_m/D/src/fsqrt.d_b1-01.S", - // "rv32i_m/D/src/fsqrt.d_b20-01.S", - // "rv32i_m/D/src/fsqrt.d_b2-01.S", - // "rv32i_m/D/src/fsqrt.d_b3-01.S", - // "rv32i_m/D/src/fsqrt.d_b4-01.S", - // "rv32i_m/D/src/fsqrt.d_b5-01.S", - // "rv32i_m/D/src/fsqrt.d_b7-01.S", - // "rv32i_m/D/src/fsqrt.d_b8-01.S", - // "rv32i_m/D/src/fsqrt.d_b9-01.S", - "rv32i_m/D/src/fssub.d_b10-01.S", - "rv32i_m/D/src/fssub.d_b1-01.S", - "rv32i_m/D/src/fssub.d_b11-01.S", - "rv32i_m/D/src/fssub.d_b12-01.S", - "rv32i_m/D/src/fssub.d_b13-01.S", - "rv32i_m/D/src/fssub.d_b2-01.S", - "rv32i_m/D/src/fssub.d_b3-01.S", - "rv32i_m/D/src/fssub.d_b4-01.S", - "rv32i_m/D/src/fssub.d_b5-01.S", - "rv32i_m/D/src/fssub.d_b7-01.S", - "rv32i_m/D/src/fssub.d_b8-01.S" + "rv64i_m/D/src/fcvt.s.d_b1-01.S", + "rv64i_m/D/src/fcvt.s.d_b22-01.S", + "rv64i_m/D/src/fcvt.s.d_b23-01.S", + "rv64i_m/D/src/fcvt.s.d_b24-01.S", + "rv64i_m/D/src/fcvt.s.d_b27-01.S", + "rv64i_m/D/src/fcvt.s.d_b28-01.S", + "rv64i_m/D/src/fcvt.s.d_b29-01.S", + "rv64i_m/D/src/fcvt.w.d_b1-01.S", + "rv64i_m/D/src/fcvt.w.d_b22-01.S", + "rv64i_m/D/src/fcvt.w.d_b23-01.S", + "rv64i_m/D/src/fcvt.w.d_b24-01.S", + "rv64i_m/D/src/fcvt.w.d_b27-01.S", + "rv64i_m/D/src/fcvt.w.d_b28-01.S", + "rv64i_m/D/src/fcvt.w.d_b29-01.S", + "rv64i_m/D/src/fcvt.wu.d_b1-01.S", + "rv64i_m/D/src/fcvt.wu.d_b22-01.S", + "rv64i_m/D/src/fcvt.wu.d_b23-01.S", + "rv64i_m/D/src/fcvt.wu.d_b24-01.S", + "rv64i_m/D/src/fcvt.wu.d_b27-01.S", + "rv64i_m/D/src/fcvt.wu.d_b28-01.S", + "rv64i_m/D/src/fcvt.wu.d_b29-01.S", + "rv64i_m/D/src/fdiv.d_b1-01.S", + "rv64i_m/D/src/fdiv.d_b20-01.S", + "rv64i_m/D/src/fdiv.d_b2-01.S", + "rv64i_m/D/src/fdiv.d_b21-01.S", + "rv64i_m/D/src/fdiv.d_b3-01.S", + "rv64i_m/D/src/fdiv.d_b4-01.S", + "rv64i_m/D/src/fdiv.d_b5-01.S", + "rv64i_m/D/src/fdiv.d_b6-01.S", + "rv64i_m/D/src/fdiv.d_b7-01.S", + "rv64i_m/D/src/fdiv.d_b8-01.S", + "rv64i_m/D/src/fdiv.d_b9-01.S", + "rv64i_m/D/src/feq.d_b1-01.S", + "rv64i_m/D/src/feq.d_b19-01.S", + "rv64i_m/D/src/fle.d_b1-01.S", + "rv64i_m/D/src/fle.d_b19-01.S", + "rv64i_m/D/src/flt.d_b1-01.S", + "rv64i_m/D/src/flt.d_b19-01.S", + // "rv64i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back + // "rv64i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266 + "rv64i_m/D/src/fmadd.d_b14-01.S", + "rv64i_m/D/src/fmadd.d_b16-01.S", + "rv64i_m/D/src/fmadd.d_b17-01.S", + "rv64i_m/D/src/fmadd.d_b18-01.S", + "rv64i_m/D/src/fmadd.d_b2-01.S", + "rv64i_m/D/src/fmadd.d_b3-01.S", + "rv64i_m/D/src/fmadd.d_b4-01.S", + "rv64i_m/D/src/fmadd.d_b5-01.S", + "rv64i_m/D/src/fmadd.d_b6-01.S", + "rv64i_m/D/src/fmadd.d_b7-01.S", + "rv64i_m/D/src/fmadd.d_b8-01.S", + "rv64i_m/D/src/fmax.d_b1-01.S", + "rv64i_m/D/src/fmax.d_b19-01.S", + "rv64i_m/D/src/fmin.d_b1-01.S", + "rv64i_m/D/src/fmin.d_b19-01.S", + "rv64i_m/D/src/fmsub.d_b14-01.S", + "rv64i_m/D/src/fmsub.d_b16-01.S", + "rv64i_m/D/src/fmsub.d_b17-01.S", + "rv64i_m/D/src/fmsub.d_b18-01.S", + "rv64i_m/D/src/fmsub.d_b2-01.S", + "rv64i_m/D/src/fmsub.d_b3-01.S", + "rv64i_m/D/src/fmsub.d_b4-01.S", + "rv64i_m/D/src/fmsub.d_b5-01.S", + "rv64i_m/D/src/fmsub.d_b6-01.S", + "rv64i_m/D/src/fmsub.d_b7-01.S", + "rv64i_m/D/src/fmsub.d_b8-01.S", + "rv64i_m/D/src/fmul.d_b1-01.S", + "rv64i_m/D/src/fmul.d_b2-01.S", + "rv64i_m/D/src/fmul.d_b3-01.S", + "rv64i_m/D/src/fmul.d_b4-01.S", + "rv64i_m/D/src/fmul.d_b5-01.S", + "rv64i_m/D/src/fmul.d_b6-01.S", + "rv64i_m/D/src/fmul.d_b7-01.S", + "rv64i_m/D/src/fmul.d_b8-01.S", + "rv64i_m/D/src/fmul.d_b9-01.S", + "rv64i_m/D/src/fmv.d.x_b25-01.S", + "rv64i_m/D/src/fmv.d.x_b26-01.S", + "rv64i_m/D/src/fmv.x.d_b1-01.S", + "rv64i_m/D/src/fmv.x.d_b22-01.S", + "rv64i_m/D/src/fmv.x.d_b23-01.S", + "rv64i_m/D/src/fmv.x.d_b24-01.S", + "rv64i_m/D/src/fmv.x.d_b27-01.S", + "rv64i_m/D/src/fmv.x.d_b28-01.S", + "rv64i_m/D/src/fmv.x.d_b29-01.S", + "rv64i_m/D/src/fnmadd.d_b14-01.S", + "rv64i_m/D/src/fnmadd.d_b16-01.S", + "rv64i_m/D/src/fnmadd.d_b17-01.S", + "rv64i_m/D/src/fnmadd.d_b18-01.S", + "rv64i_m/D/src/fnmadd.d_b2-01.S", + "rv64i_m/D/src/fnmadd.d_b3-01.S", + "rv64i_m/D/src/fnmadd.d_b4-01.S", + "rv64i_m/D/src/fnmadd.d_b5-01.S", + "rv64i_m/D/src/fnmadd.d_b6-01.S", + "rv64i_m/D/src/fnmadd.d_b7-01.S", + "rv64i_m/D/src/fnmadd.d_b8-01.S", + "rv64i_m/D/src/fnmsub.d_b14-01.S", + "rv64i_m/D/src/fnmsub.d_b16-01.S", + "rv64i_m/D/src/fnmsub.d_b17-01.S", + "rv64i_m/D/src/fnmsub.d_b18-01.S", + "rv64i_m/D/src/fnmsub.d_b2-01.S", + "rv64i_m/D/src/fnmsub.d_b3-01.S", + "rv64i_m/D/src/fnmsub.d_b4-01.S", + "rv64i_m/D/src/fnmsub.d_b5-01.S", + "rv64i_m/D/src/fnmsub.d_b6-01.S", + "rv64i_m/D/src/fnmsub.d_b7-01.S", + "rv64i_m/D/src/fnmsub.d_b8-01.S", + "rv64i_m/D/src/fsgnj.d_b1-01.S", + "rv64i_m/D/src/fsgnjn.d_b1-01.S", + "rv64i_m/D/src/fsgnjx.d_b1-01.S", + // "rv64i_m/D/src/fsqrt.d_b1-01.S", + // "rv64i_m/D/src/fsqrt.d_b20-01.S", + // "rv64i_m/D/src/fsqrt.d_b2-01.S", + // "rv64i_m/D/src/fsqrt.d_b3-01.S", + // "rv64i_m/D/src/fsqrt.d_b4-01.S", + // "rv64i_m/D/src/fsqrt.d_b5-01.S", + // "rv64i_m/D/src/fsqrt.d_b7-01.S", + // "rv64i_m/D/src/fsqrt.d_b8-01.S", + // "rv64i_m/D/src/fsqrt.d_b9-01.S", + "rv64i_m/D/src/fssub.d_b10-01.S", + "rv64i_m/D/src/fssub.d_b1-01.S", + "rv64i_m/D/src/fssub.d_b11-01.S", + "rv64i_m/D/src/fssub.d_b12-01.S", + "rv64i_m/D/src/fssub.d_b13-01.S", + "rv64i_m/D/src/fssub.d_b2-01.S", + "rv64i_m/D/src/fssub.d_b3-01.S", + "rv64i_m/D/src/fssub.d_b4-01.S", + "rv64i_m/D/src/fssub.d_b5-01.S", + "rv64i_m/D/src/fssub.d_b7-01.S", + "rv64i_m/D/src/fssub.d_b8-01.S" }; string arch32priv[] = '{ @@ -1408,6 +1559,153 @@ string imperas32f[] = '{ // "rv32i_m/F/src/fsw-align-01.S" }; + string arch32d[] = '{ + `RISCVARCHTEST, + "rv32i_m/D/src/fadd.d_b10-01.S", + "rv32i_m/D/src/fadd.d_b1-01.S", + "rv32i_m/D/src/fadd.d_b11-01.S", + "rv32i_m/D/src/fadd.d_b12-01.S", + "rv32i_m/D/src/fadd.d_b13-01.S", + "rv32i_m/D/src/fadd.d_b2-01.S", + "rv32i_m/D/src/fadd.d_b3-01.S", + "rv32i_m/D/src/fadd.d_b4-01.S", + "rv32i_m/D/src/fadd.d_b5-01.S", + "rv32i_m/D/src/fadd.d_b7-01.S", + "rv32i_m/D/src/fadd.d_b8-01.S", + "rv32i_m/D/src/fclass.d_b1-01.S", + "rv32i_m/D/src/fcvt.d.s_b1-01.S", + "rv32i_m/D/src/fcvt.d.s_b22-01.S", + "rv32i_m/D/src/fcvt.d.s_b23-01.S", + "rv32i_m/D/src/fcvt.d.s_b24-01.S", + "rv32i_m/D/src/fcvt.d.s_b27-01.S", + "rv32i_m/D/src/fcvt.d.s_b28-01.S", + "rv32i_m/D/src/fcvt.d.s_b29-01.S", + "rv32i_m/D/src/fcvt.d.w_b25-01.S", + "rv32i_m/D/src/fcvt.d.w_b26-01.S", + "rv32i_m/D/src/fcvt.d.wu_b25-01.S", + "rv32i_m/D/src/fcvt.d.wu_b26-01.S", + "rv32i_m/D/src/fcvt.s.d_b1-01.S", + "rv32i_m/D/src/fcvt.s.d_b22-01.S", + "rv32i_m/D/src/fcvt.s.d_b23-01.S", + "rv32i_m/D/src/fcvt.s.d_b24-01.S", + "rv32i_m/D/src/fcvt.s.d_b27-01.S", + "rv32i_m/D/src/fcvt.s.d_b28-01.S", + "rv32i_m/D/src/fcvt.s.d_b29-01.S", + "rv32i_m/D/src/fcvt.w.d_b1-01.S", + "rv32i_m/D/src/fcvt.w.d_b22-01.S", + "rv32i_m/D/src/fcvt.w.d_b23-01.S", + "rv32i_m/D/src/fcvt.w.d_b24-01.S", + "rv32i_m/D/src/fcvt.w.d_b27-01.S", + "rv32i_m/D/src/fcvt.w.d_b28-01.S", + "rv32i_m/D/src/fcvt.w.d_b29-01.S", + "rv32i_m/D/src/fcvt.wu.d_b1-01.S", + "rv32i_m/D/src/fcvt.wu.d_b22-01.S", + "rv32i_m/D/src/fcvt.wu.d_b23-01.S", + "rv32i_m/D/src/fcvt.wu.d_b24-01.S", + "rv32i_m/D/src/fcvt.wu.d_b27-01.S", + "rv32i_m/D/src/fcvt.wu.d_b28-01.S", + "rv32i_m/D/src/fcvt.wu.d_b29-01.S", + "rv32i_m/D/src/fdiv.d_b1-01.S", + "rv32i_m/D/src/fdiv.d_b20-01.S", + "rv32i_m/D/src/fdiv.d_b2-01.S", + "rv32i_m/D/src/fdiv.d_b21-01.S", + "rv32i_m/D/src/fdiv.d_b3-01.S", + "rv32i_m/D/src/fdiv.d_b4-01.S", + "rv32i_m/D/src/fdiv.d_b5-01.S", + "rv32i_m/D/src/fdiv.d_b6-01.S", + "rv32i_m/D/src/fdiv.d_b7-01.S", + "rv32i_m/D/src/fdiv.d_b8-01.S", + "rv32i_m/D/src/fdiv.d_b9-01.S", + "rv32i_m/D/src/feq.d_b1-01.S", + "rv32i_m/D/src/feq.d_b19-01.S", + "rv32i_m/D/src/fle.d_b1-01.S", + "rv32i_m/D/src/fle.d_b19-01.S", + "rv32i_m/D/src/flt.d_b1-01.S", + "rv32i_m/D/src/flt.d_b19-01.S", + // "rv32i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back + // "rv32i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266 + "rv32i_m/D/src/fmadd.d_b14-01.S", + "rv32i_m/D/src/fmadd.d_b16-01.S", + "rv32i_m/D/src/fmadd.d_b17-01.S", + "rv32i_m/D/src/fmadd.d_b18-01.S", + "rv32i_m/D/src/fmadd.d_b2-01.S", + "rv32i_m/D/src/fmadd.d_b3-01.S", + "rv32i_m/D/src/fmadd.d_b4-01.S", + "rv32i_m/D/src/fmadd.d_b5-01.S", + "rv32i_m/D/src/fmadd.d_b6-01.S", + "rv32i_m/D/src/fmadd.d_b7-01.S", + "rv32i_m/D/src/fmadd.d_b8-01.S", + "rv32i_m/D/src/fmax.d_b1-01.S", + "rv32i_m/D/src/fmax.d_b19-01.S", + "rv32i_m/D/src/fmin.d_b1-01.S", + "rv32i_m/D/src/fmin.d_b19-01.S", + "rv32i_m/D/src/fmsub.d_b14-01.S", + "rv32i_m/D/src/fmsub.d_b16-01.S", + "rv32i_m/D/src/fmsub.d_b17-01.S", + "rv32i_m/D/src/fmsub.d_b18-01.S", + "rv32i_m/D/src/fmsub.d_b2-01.S", + "rv32i_m/D/src/fmsub.d_b3-01.S", + "rv32i_m/D/src/fmsub.d_b4-01.S", + "rv32i_m/D/src/fmsub.d_b5-01.S", + "rv32i_m/D/src/fmsub.d_b6-01.S", + "rv32i_m/D/src/fmsub.d_b7-01.S", + "rv32i_m/D/src/fmsub.d_b8-01.S", + "rv32i_m/D/src/fmul.d_b1-01.S", + "rv32i_m/D/src/fmul.d_b2-01.S", + "rv32i_m/D/src/fmul.d_b3-01.S", + "rv32i_m/D/src/fmul.d_b4-01.S", + "rv32i_m/D/src/fmul.d_b5-01.S", + "rv32i_m/D/src/fmul.d_b6-01.S", + "rv32i_m/D/src/fmul.d_b7-01.S", + "rv32i_m/D/src/fmul.d_b8-01.S", + "rv32i_m/D/src/fmul.d_b9-01.S", + "rv32i_m/D/src/fnmadd.d_b14-01.S", + "rv32i_m/D/src/fnmadd.d_b16-01.S", + "rv32i_m/D/src/fnmadd.d_b17-01.S", + "rv32i_m/D/src/fnmadd.d_b18-01.S", + "rv32i_m/D/src/fnmadd.d_b2-01.S", + "rv32i_m/D/src/fnmadd.d_b3-01.S", + "rv32i_m/D/src/fnmadd.d_b4-01.S", + "rv32i_m/D/src/fnmadd.d_b5-01.S", + "rv32i_m/D/src/fnmadd.d_b6-01.S", + "rv32i_m/D/src/fnmadd.d_b7-01.S", + "rv32i_m/D/src/fnmadd.d_b8-01.S", + "rv32i_m/D/src/fnmsub.d_b14-01.S", + "rv32i_m/D/src/fnmsub.d_b16-01.S", + "rv32i_m/D/src/fnmsub.d_b17-01.S", + "rv32i_m/D/src/fnmsub.d_b18-01.S", + "rv32i_m/D/src/fnmsub.d_b2-01.S", + "rv32i_m/D/src/fnmsub.d_b3-01.S", + "rv32i_m/D/src/fnmsub.d_b4-01.S", + "rv32i_m/D/src/fnmsub.d_b5-01.S", + "rv32i_m/D/src/fnmsub.d_b6-01.S", + "rv32i_m/D/src/fnmsub.d_b7-01.S", + "rv32i_m/D/src/fnmsub.d_b8-01.S", + "rv32i_m/D/src/fsgnj.d_b1-01.S", + "rv32i_m/D/src/fsgnjn.d_b1-01.S", + "rv32i_m/D/src/fsgnjx.d_b1-01.S", + // "rv32i_m/D/src/fsqrt.d_b1-01.S", + // "rv32i_m/D/src/fsqrt.d_b20-01.S", + // "rv32i_m/D/src/fsqrt.d_b2-01.S", + // "rv32i_m/D/src/fsqrt.d_b3-01.S", + // "rv32i_m/D/src/fsqrt.d_b4-01.S", + // "rv32i_m/D/src/fsqrt.d_b5-01.S", + // "rv32i_m/D/src/fsqrt.d_b7-01.S", + // "rv32i_m/D/src/fsqrt.d_b8-01.S", + // "rv32i_m/D/src/fsqrt.d_b9-01.S", + "rv32i_m/D/src/fssub.d_b10-01.S", + "rv32i_m/D/src/fssub.d_b1-01.S", + "rv32i_m/D/src/fssub.d_b11-01.S", + "rv32i_m/D/src/fssub.d_b12-01.S", + "rv32i_m/D/src/fssub.d_b13-01.S", + "rv32i_m/D/src/fssub.d_b2-01.S", + "rv32i_m/D/src/fssub.d_b3-01.S", + "rv32i_m/D/src/fssub.d_b4-01.S", + "rv32i_m/D/src/fssub.d_b5-01.S", + "rv32i_m/D/src/fssub.d_b7-01.S", + "rv32i_m/D/src/fssub.d_b8-01.S" +}; + string arch32c[] = '{ `RISCVARCHTEST, diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index 9547f721c..c71ce4473 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,8 +8,8 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) #XLEN ?= 64 -#all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64 -all: root fsd_fld_tempfix arch32 +all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64 +#all: root fsd_fld_tempfix arch64 root: mkdir -p $(work_dir) @@ -33,6 +33,8 @@ arch32: arch64: riscof run --work-dir=$(work_dir) --config=config64.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser rsync -a $(work_dir)/rv64i_m/ $(arch_workdir)/rv64i_m/ || echo "error suppressed" +# Also copy F and D tests to RV64 + rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv64i_m/ || echo "error suppressed" wally32: riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run diff --git a/tests/riscof/sail_cSim/riscof_sail_cSim.py b/tests/riscof/sail_cSim/riscof_sail_cSim.py index 9831e516b..9a548b7ad 100644 --- a/tests/riscof/sail_cSim/riscof_sail_cSim.py +++ b/tests/riscof/sail_cSim/riscof_sail_cSim.py @@ -127,4 +127,6 @@ class sail_cSim(pluginTemplate): execute+=coverage_cmd make.add_target(execute) - make.execute_all(self.work_dir) +# make.execute_all(self.work_dir) +# DH 7/26/22 increase timeout so sim will finish on slow machines + make.execute_all(self.work_dir, timeout = 600) From 5218865a7f0cf87cbd8fb180c290b020b9b4a78d Mon Sep 17 00:00:00 2001 From: slmnemo Date: Tue, 26 Jul 2022 09:14:40 -0700 Subject: [PATCH 111/138] Committing changes made to UART test --- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 1 + .../rv32i_m/privilege/src/WALLY-uart-01.S | 77 ++++++++++++++++--- 2 files changed, 68 insertions(+), 10 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index e69b54f9d..db123f078 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -1061,6 +1061,7 @@ uart_lsr_intr_loop: lb t3, 0(t2) andi t3, t3, 0x7 bne t3, t4, uart_lsr_intr_loop +uart_save_iir_status: sw t3, 0(t1) addi t1, t1, 4 addi a6, a6, 4 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S index cf430cb48..603542fde 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S @@ -119,16 +119,16 @@ test_cases: # =========== Transmit-related interrupts =========== -.4byte UART_IER, 0x07, write08_test # enable data available, buffer empty, and line status interrupts -.4byte UART_IIR, 0x02, read08_test # buffer should be empty, causing interrupt -.4byte UART_THR, 0x00, write08_test # write zeroes to transmitter -.4byte 0x0, 0x0401, uart_data_wait # IIR should have data ready interrupt -.4byte UART_THR, 0x01, write08_test # write 1 to transmitter buffer -.4byte UART_IIR, 0x04, read08_test # data interrupt should still be high -.4byte 0x0, 0x06, uart_lsr_intr_wait # wait for transmission to complete, IIR should throw error due to overrun error. -.4byte UART_LSR, 0x63, read08_test # read overrun error from LSR -.4byte UART_IIR, 0x04, read08_test # check that LSR interrupt was cleared -.4byte UART_RBR, 0x01, read08_test # read previous value from UART +.4byte UART_IER, 0x07, write08_test # enable data available, buffer empty, and line status interrupts +.4byte UART_IIR, 0x02, read08_test # buffer should be empty, causing interrupt +.4byte UART_THR, 0x00, write08_test # write zeroes to transmitter +.4byte 0x0, 0x0401, uart_data_wait # IIR should have data ready interrupt +.4byte UART_THR, 0x01, write08_test # write 1 to transmitter buffer +.4byte UART_IIR, 0x04, read08_test # data interrupt should still be high +.4byte 0x0, 0x06, uart_lsr_intr_wait # wait for transmission to complete, IIR should throw error due to overrun error. +.4byte UART_LSR, 0x63, read08_test # read overrun error from LSR +.4byte UART_IIR, 0x04, read08_test # check that LSR interrupt was cleared +.4byte UART_RBR, 0x01, read08_test # read previous value from UART # =========== MODEM interrupts =========== @@ -183,5 +183,62 @@ test_cases: .4byte UART_FCR, 0xC1, read08_test # Check that FCR clears bits 1 and 2 when written to 1 .4byte UART_LSR, 0x60, read08_test # No data ready, FIFO cleared by writing to FCR +# =========== FIFO receiver/overrun =========== + +.4byte UART_FCR, 0x01, write08_test # Set FIFO trigger threshold to 1 and enable FIFO mode +.4byte UART_IIR, 0xC1, read08_test # FIFO has not reached trigger level +.4byte UART_THR, 0x00, write08_test # Write 0 to transmit register +.4byte 0x0, 0xC401, uart_data_wait # Interrupt due to trigger threshold reached +.4byte UART_FCR, 0x41, write08_test # Set FIFO trigger threshold to 4 +.4byte UART_IIR, 0xC1, read08_test # FIFO has not reached trigger threshold +.4byte UART_THR, 0x01, write08_test # Write 1 to transmit register +.4byte UART_THR, 0x02, write08_test # Write 2 to transmit register +.4byte 0x0, 0xC101, uart_data_wait # FIFO has not reached trigger threshold +.4byte UART_THR, 0x03, write08_test # Write 3 to transmit register +.4byte 0x0, 0xC401, uart_data_wait # Interrupt due to trigger threshold reached +.4byte UART_FCR, 0x81, write08_test # Set FIFO trigger threshold to 8 +.4byte UART_IIR, 0xC1, read08_test # FIFO has not reached trigger threshold +.4byte UART_THR, 0x04, write08_test # Write 4 to transmit register +.4byte UART_THR, 0x05, write08_test # Write 5 to transmit register +.4byte UART_THR, 0x06, write08_test # Write 6 to transmit register +.4byte 0x0, 0xC101, uart_data_wait # FIFO has not reached trigger threshold +.4byte UART_THR, 0x07, write08_test # Write 7 to transmit register +.4byte 0x0, 0xC401, uart_data_wait # Interrupt due to trigger threshold reached +.4byte UART_FCR, 0xC1, write08_test # Set FIFO trigger threshold to 14 +.4byte UART_IIR, 0xC1, read08_test # FIFO has not reached trigger threshold +.4byte UART_THR, 0x08, write08_test # Write 8 to transmit register +.4byte UART_THR, 0x09, write08_test # Write 9 to transmit register +.4byte UART_THR, 0x0A, write08_test # Write 10 to transmit register +.4byte UART_THR, 0x0B, write08_test # Write 11 to transmit register +.4byte UART_THR, 0x0C, write08_test # Write 12 to transmit register +.4byte 0x0, 0xC101, uart_data_wait # FIFO has not reached trigger threshold +.4byte UART_THR, 0x0D, write08_test # Write 13 to transmit register +.4byte 0x0, 0xC401, uart_data_wait # Interrupt due to trigger threshold reached +.4byte UART_THR, 0x0E, write08_test # Write 14 to transmit register +.4byte UART_THR, 0x0F, write08_test # Write 15 to transmit register +.4byte 0x0, 0xC401, uart_data_wait +.4byte UART_LSR, 0x61, read08_test # FIFO contains data, no overrun error +.4byte UART_THR, 0x10, write08_test # Write 16 to transmit register, filling RX shift register +.4byte UART_THR, 0x11, write08_test # Write 17 to transmit register, destroying contents held in shift register +.4byte 0x0, 0x06, uart_lsr_intr_wait # Wait for LSR interrupt ID +.4byte UART_LSR, 0x63, read08_test # Read overrun error from LSR +.4byte UART_RBR, 0x00, read08_test # Read 0 from FIFO +.4byte UART_RBR, 0x01, read08_test # Read 1 from FIFO +.4byte UART_THR, 0x12, write08_test # Write 18 to transmit register, showing space in FIFO +.4byte 0x0, 0x0401, uart_data_wait +.4byte UART_RBR, 0x02, read08_test # Read 2 from FIFO +.4byte UART_RBR, 0x03, read08_test # Read 3 from FIFO +.4byte UART_RBR, 0x04, read08_test # Read 4 from FIFO +.4byte UART_RBR, 0x05, read08_test # Read 5 from FIFO +.4byte UART_RBR, 0x06, read08_test # Read 6 from FIFO +.4byte UART_RBR, 0x07, read08_test # Read 7 from FIFO +.4byte UART_RBR, 0x08, read08_test # Read 8 from FIFO +.4byte UART_RBR, 0x09, read08_test # Read 9 from FIFO +.4byte UART_RBR, 0x0A, read08_test # Read 10 from FIFO +.4byte UART_RBR, 0x0B, read08_test # Read 11 from FIFO +.4byte UART_RBR, 0x0C, read08_test # Read 12 from FIFO +.4byte UART_RBR, 0x0D, read08_test # Read 13 from FIFO +.4byte UART_RBR, 0x0E, read08_test # Read 14 from FIFO +.4byte UART_RBR, 0x0F, read08_test # Read 15 from FIFO .4byte 0x0, 0x0, terminate_test \ No newline at end of file From 7348af7fd51d72293ccd01b5e6e3321e10c06f51 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Tue, 26 Jul 2022 09:39:31 -0700 Subject: [PATCH 112/138] Updated reference file for UART test --- .../references/WALLY-uart-01.reference_output | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output index cfe54cb1c..f84f750ff 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output @@ -51,5 +51,38 @@ ffffffC1 0000C101 ffffffC1 00000060 +ffffffC1 # FIFO filling/overrun test +0000C401 # Threshold = 1 +ffffffC1 # Threshold = 4 +0000C101 +0000C401 +ffffffC1 # Threshold = 8 +0000C101 +0000C401 +ffffffC1 # Threshold = 14 +0000C101 +0000C401 +0000C401 +00000061 +00000006 +00000063 +00000000 +00000001 +0000C401 +00000061 +00000002 +00000003 +00000004 +00000005 +00000006 +00000007 +00000008 +00000009 +0000000A +0000000B +0000000C +0000000D +0000000E +0000000F 0000000b # ecall from test termination From 75a265159ba398536b7477589e548a2d400c67c5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 27 Jul 2022 04:05:21 +0000 Subject: [PATCH 113/138] Increased timeout threshold to avoid timeout building riscof tests on slow machine --- pipelined/testbench/testbench.sv | 4 ++-- tests/riscof/Makefile | 4 ++-- tests/riscof/sail_cSim/riscof_sail_cSim.py | 2 +- tests/riscof/spike/riscof_spike.py | 5 ++++- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 92971f5c4..3aa123fc3 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -89,8 +89,8 @@ logic [3:0] dummy; if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv}; else tests = {arch64c}; "arch64m": if (`M_SUPPORTED) tests = arch64m; - "arch64f": if (`D_SUPPORTED) tests = arch64f; - "arch32f": if (`F_SUPPORTED) tests = arch32f; // 32-bit FP tests r "arch64d": if (`D_SUPPORTED) tests = arch64d; + "arch64f": if (`F_SUPPORTED) tests = arch64f; + "arch64d": if (`D_SUPPORTED) tests = arch64d; "imperas64i": tests = imperas64i; "imperas64f": if (`F_SUPPORTED) tests = imperas64f; "imperas64d": if (`D_SUPPORTED) tests = imperas64d; diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index c71ce4473..63f949006 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,8 +8,8 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) #XLEN ?= 64 -all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64 -#all: root fsd_fld_tempfix arch64 +#all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64 +all: root fsd_fld_tempfix wally32 root: mkdir -p $(work_dir) diff --git a/tests/riscof/sail_cSim/riscof_sail_cSim.py b/tests/riscof/sail_cSim/riscof_sail_cSim.py index 9a548b7ad..aa32757ca 100644 --- a/tests/riscof/sail_cSim/riscof_sail_cSim.py +++ b/tests/riscof/sail_cSim/riscof_sail_cSim.py @@ -129,4 +129,4 @@ class sail_cSim(pluginTemplate): make.add_target(execute) # make.execute_all(self.work_dir) # DH 7/26/22 increase timeout so sim will finish on slow machines - make.execute_all(self.work_dir, timeout = 600) + make.execute_all(self.work_dir, timeout = 1800) diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py index 4f74c72f4..d7d65c0b3 100644 --- a/tests/riscof/spike/riscof_spike.py +++ b/tests/riscof/spike/riscof_spike.py @@ -180,7 +180,10 @@ class spike(pluginTemplate): # once the make-targets are done and the makefile has been created, run all the targets in # parallel using the make command set above. - make.execute_all(self.work_dir) + #make.execute_all(self.work_dir) + # DH 7/26/22 increase timeout to 1800 seconds so sim will finish on slow machines + make.execute_all(self.work_dir, timeout = 1800) + # if target runs are not required then we simply exit as this point after running all # the makefile targets. From b08c87cb47c416db1864739b2431d76ef8aaa431 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 27 Jul 2022 04:06:59 +0000 Subject: [PATCH 114/138] Finished UART test --- .../references/WALLY-uart-01.reference_output | 27 +++---------------- .../rv32i_m/privilege/src/WALLY-uart-01.S | 22 ++------------- 2 files changed, 6 insertions(+), 43 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output index f84f750ff..f998d4660 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output @@ -62,27 +62,8 @@ ffffffC1 # Threshold = 8 ffffffC1 # Threshold = 14 0000C101 0000C401 -0000C401 -00000061 -00000006 -00000063 -00000000 -00000001 -0000C401 -00000061 -00000002 -00000003 -00000004 -00000005 -00000006 -00000007 -00000008 -00000009 -0000000A -0000000B -0000000C -0000000D -0000000E -0000000F - +0000C201 +00000061 # FIFO has data, no overrun +00000006 # wait for interrupt +000000A3 # FIFO overrun error 0000000b # ecall from test termination diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S index 603542fde..86d1eb51a 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S @@ -216,29 +216,11 @@ test_cases: .4byte 0x0, 0xC401, uart_data_wait # Interrupt due to trigger threshold reached .4byte UART_THR, 0x0E, write08_test # Write 14 to transmit register .4byte UART_THR, 0x0F, write08_test # Write 15 to transmit register -.4byte 0x0, 0xC401, uart_data_wait +.4byte 0x0, 0xC201, uart_data_wait .4byte UART_LSR, 0x61, read08_test # FIFO contains data, no overrun error .4byte UART_THR, 0x10, write08_test # Write 16 to transmit register, filling RX shift register .4byte UART_THR, 0x11, write08_test # Write 17 to transmit register, destroying contents held in shift register .4byte 0x0, 0x06, uart_lsr_intr_wait # Wait for LSR interrupt ID -.4byte UART_LSR, 0x63, read08_test # Read overrun error from LSR -.4byte UART_RBR, 0x00, read08_test # Read 0 from FIFO -.4byte UART_RBR, 0x01, read08_test # Read 1 from FIFO -.4byte UART_THR, 0x12, write08_test # Write 18 to transmit register, showing space in FIFO -.4byte 0x0, 0x0401, uart_data_wait -.4byte UART_RBR, 0x02, read08_test # Read 2 from FIFO -.4byte UART_RBR, 0x03, read08_test # Read 3 from FIFO -.4byte UART_RBR, 0x04, read08_test # Read 4 from FIFO -.4byte UART_RBR, 0x05, read08_test # Read 5 from FIFO -.4byte UART_RBR, 0x06, read08_test # Read 6 from FIFO -.4byte UART_RBR, 0x07, read08_test # Read 7 from FIFO -.4byte UART_RBR, 0x08, read08_test # Read 8 from FIFO -.4byte UART_RBR, 0x09, read08_test # Read 9 from FIFO -.4byte UART_RBR, 0x0A, read08_test # Read 10 from FIFO -.4byte UART_RBR, 0x0B, read08_test # Read 11 from FIFO -.4byte UART_RBR, 0x0C, read08_test # Read 12 from FIFO -.4byte UART_RBR, 0x0D, read08_test # Read 13 from FIFO -.4byte UART_RBR, 0x0E, read08_test # Read 14 from FIFO -.4byte UART_RBR, 0x0F, read08_test # Read 15 from FIFO +.4byte UART_LSR, 0xA3, read08_test # Read overrun error from LSR .4byte 0x0, 0x0, terminate_test \ No newline at end of file From 429bdae1c47c9a85374462895a35a3a07858f802 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 27 Jul 2022 22:16:38 +0000 Subject: [PATCH 115/138] Fixed UART reference output --- .../rv32i_m/privilege/references/WALLY-uart-01.reference_output | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output index f998d4660..e93b059a0 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output @@ -65,5 +65,5 @@ ffffffC1 # Threshold = 14 0000C201 00000061 # FIFO has data, no overrun 00000006 # wait for interrupt -000000A3 # FIFO overrun error +ffffffA3 # FIFO overrun error 0000000b # ecall from test termination From 93d7d7179e130adff899bbba0059b8db97f8d89a Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 28 Jul 2022 04:35:51 +0000 Subject: [PATCH 116/138] Added parity and stop bit tests to UART --- pipelined/src/uncore/uartPC16550D.sv | 6 ++-- .../references/WALLY-uart-01.reference_output | 8 ++++- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 6 ++-- .../rv32i_m/privilege/src/WALLY-uart-01.S | 32 ++++++++++++++++--- 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv index dcb04883f..a55350c6a 100644 --- a/pipelined/src/uncore/uartPC16550D.sv +++ b/pipelined/src/uncore/uartPC16550D.sv @@ -328,9 +328,9 @@ module uartPC16550D( rxfifohead <= #1 0; rxfifotail <= #1 0; rxdataready <= #1 0; end else if (rxstate == UART_DONE) begin RXBR <= #1 {rxoverrunerr, rxparityerr, rxframingerr, rxdata}; // load recevive buffer register - if (rxoverrunerr) $warning("UART RX Overrun Error\n"); - if (rxparityerr) $warning("UART RX Parity Error\n"); - if (rxframingerr) $warning("UART RX Framing Error\n"); + if (rxoverrunerr) $warning("UART RX Overrun Err\n"); + if (rxparityerr) $warning("UART RX Parity Err\n"); + if (rxframingerr) $warning("UART RX Framing Err\n"); if (fifoenabled) begin rxfifo[rxfifohead] <= #1 {rxoverrunerr, rxparityerr, rxframingerr, rxdata}; rxfifohead <= #1 rxfifohead + 1; diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output index e93b059a0..7b4c916c7 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output @@ -16,6 +16,12 @@ 0000007F 00000101 # Transmit 8 bits ffffff80 +00000101 # Odd parity +00000079 +00000101 # Even parity +0000006A +00000101 # Extra stop bit +0000005B 00000002 # Transmission interrupt tests 00000401 # Interrupt generated by finished transmission 00000004 @@ -62,7 +68,7 @@ ffffffC1 # Threshold = 8 ffffffC1 # Threshold = 14 0000C101 0000C401 -0000C201 +0000C101 00000061 # FIFO has data, no overrun 00000006 # wait for interrupt ffffffA3 # FIFO overrun error diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index db123f078..7146e8334 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -1072,9 +1072,9 @@ uart_data_wait: li t3, 0x10000002 // IIR li a4, 0x61 uart_read_LSR_IIR: - lb t4, 0(t3) // save IIR before potential clear - lb t5, 0(t2) - andi t6, t5, 0x61 // only care if all transmissions are done + lb t4, 0(t3) // save IIR before reading LSR mgith clear it + lb t5, 0(t2) // read LSR + andi t6, t5, 0x61 // wait until all transmissions are done and data is ready bne a4, t6, uart_read_LSR_IIR uart_data_ready: diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S index 86d1eb51a..deba11c29 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S @@ -71,14 +71,14 @@ test_cases: .4byte UART_IER, 0x00, read08_test .4byte UART_IIR, 0x01, read08_test # IIR resets to 1 -# .4byte UART_LCR, 0x00, read08_test *** commented out because LCR should reset to zero but resets to 3 +# .4byte UART_LCR, 0x00, read08_test *** commented out because LCR should reset to zero but resets to 3 to help Linux boot .4byte UART_MCR, 0x00, read08_test .4byte UART_LSR, 0x60, read08_test # LSR resets with transmit status bits set .4byte UART_MSR, 0x00, read04_test # =========== Basic read-write =========== -.4byte UART_LCR, 0x00, write08_test # set LCR to reset value *** remove if UART resets to correct value +.4byte UART_LCR, 0x00, write08_test # set LCR to initial value .4byte UART_MCR, 0x10, write08_test # put UART into loopback for MSR test .4byte UART_LSR, 0x60, read08_test .4byte UART_THR, 0x00, write08_test # write value to UART @@ -112,11 +112,33 @@ test_cases: # Transmit 8 bits -.4byte UART_LCR, 0x03, write08_test # set LCR to transmit seven bits +.4byte UART_LCR, 0x03, write08_test # set LCR to transmit eight bits .4byte UART_THR, 0x80, write08_test # write value to UART .4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR .4byte UART_RBR, 0x80, read08_test # read full written value + sign extension +# Check function with odd parity + +.4byte UART_LCR, 0x0B, write08_test # set LCR to transmit 8 bits + odd partiy +.4byte UART_THR, 0x79, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x79, read08_test # read full written value + +# Check function with even parity + +.4byte UART_LCR, 0x1B, write08_test # set LCR to transmit 8 bits + even parity +.4byte UART_THR, 0x6A, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x6A, read08_test # read full written value + +# Check function with extra stop bit + +.4byte UART_LCR, 0x07, write08_test # set LCR to transmit 8 bits + extra stop +.4byte UART_THR, 0x5B, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x5B, read08_test # read full written value +.4byte UART_LCR, 0x03, write08_test # set LCR to transmit 8 bits + no extra stop bit + # =========== Transmit-related interrupts =========== .4byte UART_IER, 0x07, write08_test # enable data available, buffer empty, and line status interrupts @@ -126,7 +148,7 @@ test_cases: .4byte UART_THR, 0x01, write08_test # write 1 to transmitter buffer .4byte UART_IIR, 0x04, read08_test # data interrupt should still be high .4byte 0x0, 0x06, uart_lsr_intr_wait # wait for transmission to complete, IIR should throw error due to overrun error. -.4byte UART_LSR, 0x63, read08_test # read overrun error from LSR +.4byte UART_LSR, 0x23, read08_test # read overrun error from LSR .4byte UART_IIR, 0x04, read08_test # check that LSR interrupt was cleared .4byte UART_RBR, 0x01, read08_test # read previous value from UART @@ -216,7 +238,7 @@ test_cases: .4byte 0x0, 0xC401, uart_data_wait # Interrupt due to trigger threshold reached .4byte UART_THR, 0x0E, write08_test # Write 14 to transmit register .4byte UART_THR, 0x0F, write08_test # Write 15 to transmit register -.4byte 0x0, 0xC201, uart_data_wait +.4byte 0x0, 0xC101, uart_data_wait .4byte UART_LSR, 0x61, read08_test # FIFO contains data, no overrun error .4byte UART_THR, 0x10, write08_test # Write 16 to transmit register, filling RX shift register .4byte UART_THR, 0x11, write08_test # Write 17 to transmit register, destroying contents held in shift register From 1bd6351e1fd4b0f2ad0e248cd7999d2e098ba189 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 29 Jul 2022 22:54:49 +0000 Subject: [PATCH 117/138] re-added FStore2 in Cache --- pipelined/src/cache/cache.sv | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index d28697e21..609810e8c 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -162,12 +162,18 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER logic [LINELEN-1:0] FinalWriteDataDup; assign FinalWriteDataDup = {WORDSPERLINE{FinalWriteData}}; - onehotdecoder #(LOGWPL) adrdec( - .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); + if(`LLEN>`XLEN)begin + logic [2**LOGWPL-1:0] MemPAdrDecodedtmp; + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp)); + assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FStore2}}, 1'b0}; + end else + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); for(index = 0; index < 2**LOGWPL; index++) begin assign DemuxedByteMask[(index+1)*(`XLEN/8)-1:index*(`XLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; end - // *** have to add back in fstore2 + assign LineByteMux = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. assign LineByteMask = ~SetValid & ~SetDirty ? '0 : ~SetValid & SetDirty ? DemuxedByteMask : '1; // if store hit only enable the word and subword bytes, else write all bytes. From 257107f908519683bdadd5c9faf8e9b26c0957ee Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 18:07:38 +0000 Subject: [PATCH 118/138] Partitioned fma into separate files --- pipelined/regression/sim-wally-batch | 2 +- pipelined/src/fpu/fma.sv | 231 +-------------------------- pipelined/src/fpu/fmaadd.sv | 83 ++++++++++ pipelined/src/fpu/fmaalign.sv | 101 ++++++++++++ pipelined/src/fpu/fmaexpadd.sv | 42 +++++ pipelined/src/fpu/fmalza.sv | 62 +++++++ pipelined/src/fpu/fmamult.sv | 38 +++++ pipelined/src/fpu/fmasign.sv | 47 ++++++ pipelined/testbench/tests.vh | 3 +- 9 files changed, 384 insertions(+), 225 deletions(-) create mode 100644 pipelined/src/fpu/fmaadd.sv create mode 100644 pipelined/src/fpu/fmaalign.sv create mode 100644 pipelined/src/fpu/fmaexpadd.sv create mode 100644 pipelined/src/fpu/fmalza.sv create mode 100644 pipelined/src/fpu/fmamult.sv create mode 100644 pipelined/src/fpu/fmasign.sv diff --git a/pipelined/regression/sim-wally-batch b/pipelined/regression/sim-wally-batch index 8b5b5d628..7afcadb2e 100755 --- a/pipelined/regression/sim-wally-batch +++ b/pipelined/regression/sim-wally-batch @@ -1 +1 @@ -vsim -c -do "do wally-pipelined-batch.do rv32gc wally32periph" +vsim -c -do "do wally-pipelined-batch.do rv64gc arch64d" diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 067147ee6..fcf209f6b 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -1,7 +1,7 @@ /////////////////////////////////////////// // -// Written: me@KatherineParry.com, David Harris -// Modified: 6/23/2021 +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: // // Purpose: Floating point multiply-accumulate of configurable size // @@ -63,18 +63,18 @@ module fma( // calculate the product's exponent - expadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe); + fmaexpadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe); // multiplication of the mantissa's - mult mult(.Xm, .Ym, .Pm); + fmamult mult(.Xm, .Ym, .Pm); /////////////////////////////////////////////////////////////////////////////// // Alignment shifter /////////////////////////////////////////////////////////////////////////////// // calculate the signs and take the opperation into account - sign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As); + fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As); - align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, + fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ZmSticky, .KillProd); @@ -83,223 +83,8 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); + fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); + fmalza lza(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); endmodule - -module expadd( - input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad - input logic [`NE-1:0] Xe, Ye, // input's exponents - input logic XZero, YZero, // are the inputs zero - output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2 -); - - // kill the exponent if the product is zero - either X or Y is 0 - assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}}; - -endmodule - - - - - -module mult( - input logic [`NF:0] Xm, Ym, - output logic [2*`NF+1:0] Pm -); - assign Pm = Xm * Ym; -endmodule - - - - - - - - -module sign( - input logic [2:0] OpCtrl, // opperation contol - input logic Xs, Ys, Zs, // sign of the inputs - output logic Ps, // the product's sign - takes opperation into account - output logic As // aligned addend sign used in fma - takes opperation into account -); - - // Calculate the product's sign - // Negate product's sign if FNMADD or FNMSUB - - // flip is negation opperation - assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); - // flip if subtraction - assign As = Zs^OpCtrl[0]; - -endmodule - - - - - - - - -module align( - input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format - input logic [`NF:0] Zm, // significand in U(0.NF) format] - input logic XZero, YZero, ZZero, // is the input zero - output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) - output logic ZmSticky, // Sticky bit calculated from the aliged addend - output logic KillProd // should the product be set to zero -); - - logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) - logic KillZ; - - /////////////////////////////////////////////////////////////////////////////// - // Alignment shifter - /////////////////////////////////////////////////////////////////////////////// - - // determine the shift count for alignment - // - negitive means Z is larger, so shift Z left - // - positive means the product is larger, so shift Z right - // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed - assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze}; - - // Defualt Addition without shifting - // | 54'b0 | 106'b(product) | 2'b0 | - // | addnend | - - // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) - assign ZmPreshifted = {Zm,(3*`NF+5)'(0)}; - - assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; - assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); - - always_comb - begin - - // If the product is too small to effect the sum, kill the product - - // | 54'b0 | 106'b(product) | 2'b0 | - // | addnend | - if (KillProd) begin - ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)}; - ZmSticky = ~(XZero|YZero); - - // If the addend is too small to effect the addition - // - The addend has to shift two past the end of the addend to be considered too small - // - The 2 extra bits are needed for rounding - - // | 54'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else if (KillZ) begin - ZmShifted = 0; - ZmSticky = ~ZZero; - - // If the Addend is shifted right - // | 54'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else begin - ZmShifted = ZmPreshifted >> ACnt; - ZmSticky = |(ZmShifted[`NF-1:0]); - - end - end - - assign Am = ZmShifted[4*`NF+5:`NF]; - -endmodule - - - - - - - -module add( - input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) - input logic [2*`NF+1:0] Pm, // the product's mantissa - input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) - input logic KillProd, // should the product be set to 0 - input logic ZmSticky, - input logic [`NE-1:0] Ze, - input logic [`NE+1:0] Pe, - output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted - output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed - output logic NegSum, // was the sum negitive - output logic InvA, // do you invert the aligned addend - output logic Ss, - output logic [`NE+1:0] Se, - output logic [3*`NF+5:0] Sm // the positive sum -); - logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum - - /////////////////////////////////////////////////////////////////////////////// - // Addition - /////////////////////////////////////////////////////////////////////////////// - - // Negate Z when doing one of the following opperations: - // -prod + Z - // prod - Z - assign InvA = As ^ Ps; - - // Choose an inverted or non-inverted addend - the one has to be added now for the LZA - assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; - // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign PmKilled = Pm&{2*`NF+2{~KillProd}}; - // Do the addition - // - calculate a positive and negitive sum in parallel - // Zsticky Psticky - // PreSum -1 = don't add 1 +1 = add 2 - // NegPreSum +1 = add 2 -1 = don't add 1 - // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 - assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; - assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; - - // Is the sum negitive - assign NegSum = PreSum[3*`NF+6]; - - // Choose the positive sum and accompanying LZA result. - assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; - // is the result negitive - // if p - z is the Sum negitive - // if -p + z is the Sum positive - // if -p - z then the Sum is negitive - assign Ss = NegSum^Ps; //*** move to execute stage - assign Se = KillProd ? {2'b0, Ze} : Pe; -endmodule - - -module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+3:0] P, // product - output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result - ); - - logic [3*`NF+6:0] T; - logic [3*`NF+6:0] G; - logic [3*`NF+6:0] Z; - logic [3*`NF+6:0] f; - - assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; - assign G[3*`NF+6:2*`NF+4] = 0; - assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; - assign T[2*`NF+3:0] = A[2*`NF+3:0]^P; - assign G[2*`NF+3:0] = A[2*`NF+3:0]&P; - assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P; - - - // Apply function to determine Leading pattern - // - note: the paper linked above uses the numbering system where 0 is the most significant bit - //f[n] = ~T[n]&T[n-1] note: n is the MSB - //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1])) - assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5]; - assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); - - - - lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt)); - -endmodule diff --git a/pipelined/src/fpu/fmaadd.sv b/pipelined/src/fpu/fmaadd.sv new file mode 100644 index 000000000..4b52208c6 --- /dev/null +++ b/pipelined/src/fpu/fmaadd.sv @@ -0,0 +1,83 @@ +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: FMA significand adder +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmaadd( + input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) + input logic [2*`NF+1:0] Pm, // the product's mantissa + input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) + input logic KillProd, // should the product be set to 0 + input logic ZmSticky, + input logic [`NE-1:0] Ze, + input logic [`NE+1:0] Pe, + output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted + output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed + output logic NegSum, // was the sum negitive + output logic InvA, // do you invert the aligned addend + output logic Ss, + output logic [`NE+1:0] Se, + output logic [3*`NF+5:0] Sm // the positive sum +); + logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum + + /////////////////////////////////////////////////////////////////////////////// + // Addition + /////////////////////////////////////////////////////////////////////////////// + + // Negate Z when doing one of the following opperations: + // -prod + Z + // prod - Z + assign InvA = As ^ Ps; + + // Choose an inverted or non-inverted addend - the one has to be added now for the LZA + assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; + // Kill the product if the product is too small to effect the addition (determined in fma1.sv) + assign PmKilled = Pm&{2*`NF+2{~KillProd}}; + // Do the addition + // - calculate a positive and negitive sum in parallel + // Zsticky Psticky + // PreSum -1 = don't add 1 +1 = add 2 + // NegPreSum +1 = add 2 -1 = don't add 1 + // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 + assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; + assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; + + // Is the sum negitive + assign NegSum = PreSum[3*`NF+6]; + + // Choose the positive sum and accompanying LZA result. + assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign Ss = NegSum^Ps; //*** move to execute stage + assign Se = KillProd ? {2'b0, Ze} : Pe; +endmodule diff --git a/pipelined/src/fpu/fmaalign.sv b/pipelined/src/fpu/fmaalign.sv new file mode 100644 index 000000000..f7c849993 --- /dev/null +++ b/pipelined/src/fpu/fmaalign.sv @@ -0,0 +1,101 @@ + +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: FMA alginment shift +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmaalign( + input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format + input logic [`NF:0] Zm, // significand in U(0.NF) format] + input logic XZero, YZero, ZZero, // is the input zero + output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) + output logic ZmSticky, // Sticky bit calculated from the aliged addend + output logic KillProd // should the product be set to zero +); + + logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format + logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) + logic KillZ; + + /////////////////////////////////////////////////////////////////////////////// + // Alignment shifter + /////////////////////////////////////////////////////////////////////////////// + + // determine the shift count for alignment + // - negitive means Z is larger, so shift Z left + // - positive means the product is larger, so shift Z right + // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed + assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze}; + + // Defualt Addition without shifting + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + + // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) + assign ZmPreshifted = {Zm,(3*`NF+5)'(0)}; + + assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; + assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); + + always_comb + begin + + // If the product is too small to effect the sum, kill the product + + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + if (KillProd) begin + ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)}; + ZmSticky = ~(XZero|YZero); + + // If the addend is too small to effect the addition + // - The addend has to shift two past the end of the addend to be considered too small + // - The 2 extra bits are needed for rounding + + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else if (KillZ) begin + ZmShifted = 0; + ZmSticky = ~ZZero; + + // If the Addend is shifted right + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else begin + ZmShifted = ZmPreshifted >> ACnt; + ZmSticky = |(ZmShifted[`NF-1:0]); + + end + end + + assign Am = ZmShifted[4*`NF+5:`NF]; + +endmodule + diff --git a/pipelined/src/fpu/fmaexpadd.sv b/pipelined/src/fpu/fmaexpadd.sv new file mode 100644 index 000000000..1d208327b --- /dev/null +++ b/pipelined/src/fpu/fmaexpadd.sv @@ -0,0 +1,42 @@ +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: FMA exponent addition +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmaexpadd( + input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad + input logic [`NE-1:0] Xe, Ye, // input's exponents + input logic XZero, YZero, // are the inputs zero + output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2 +); + + // kill the exponent if the product is zero - either X or Y is 0 + assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}}; + +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv new file mode 100644 index 000000000..3baaf2a08 --- /dev/null +++ b/pipelined/src/fpu/fmalza.sv @@ -0,0 +1,62 @@ +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: Leading Zero Anticipator +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] + input logic [3*`NF+6:0] A, // addend + input logic [2*`NF+3:0] P, // product + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result + ); + + logic [3*`NF+6:0] T; + logic [3*`NF+6:0] G; + logic [3*`NF+6:0] Z; + logic [3*`NF+6:0] f; + + assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; + assign G[3*`NF+6:2*`NF+4] = 0; + assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; + assign T[2*`NF+3:0] = A[2*`NF+3:0]^P; + assign G[2*`NF+3:0] = A[2*`NF+3:0]&P; + assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P; + + + // Apply function to determine Leading pattern + // - note: the paper linked above uses the numbering system where 0 is the most significant bit + //f[n] = ~T[n]&T[n-1] note: n is the MSB + //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1])) + assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5]; + assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + + + + lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt)); + +endmodule diff --git a/pipelined/src/fpu/fmamult.sv b/pipelined/src/fpu/fmamult.sv new file mode 100644 index 000000000..1e1b0981e --- /dev/null +++ b/pipelined/src/fpu/fmamult.sv @@ -0,0 +1,38 @@ +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: FMA Significand Multiplier +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmamult( + input logic [`NF:0] Xm, Ym, + output logic [2*`NF+1:0] Pm +); + assign Pm = Xm * Ym; +endmodule + diff --git a/pipelined/src/fpu/fmasign.sv b/pipelined/src/fpu/fmasign.sv new file mode 100644 index 000000000..66c1af83a --- /dev/null +++ b/pipelined/src/fpu/fmasign.sv @@ -0,0 +1,47 @@ +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: FMA Sign Logic +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmasign( + input logic [2:0] OpCtrl, // opperation contol + input logic Xs, Ys, Zs, // sign of the inputs + output logic Ps, // the product's sign - takes opperation into account + output logic As // aligned addend sign used in fma - takes opperation into account +); + + // Calculate the product's sign + // Negate product's sign if FNMADD or FNMSUB + + // flip is negation opperation + assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); + // flip if subtraction + assign As = Zs^OpCtrl[0]; + +endmodule diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 587733c39..fe3bd62f4 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1902,7 +1902,8 @@ string imperas32f[] = '{ "rv32i_m/privilege/src/WALLY-gpio-01.S", "rv32i_m/privilege/src/WALLY-clint-01.S", "rv32i_m/privilege/src/WALLY-uart-01.S", - "rv32i_m/privilege/src/WALLY-plic-01.S" + "rv32i_m/privilege/src/WALLY-plic-01.S", + "rv32i_m/privilege/src/WALLY-plic-s-01.S" }; From 7f9b6014670f4f866c1f83e6c9d43a52fe22a239 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 18:23:39 +0000 Subject: [PATCH 119/138] fmalza edits to match textbook --- pipelined/src/fpu/fma.sv | 2 +- pipelined/src/fpu/fmalza.sv | 33 ++++++++++++++------------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index fcf209f6b..5f595b1fc 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -85,6 +85,6 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); + fmalza lza(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index 3baaf2a08..a05084e2d 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -31,32 +31,27 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+3:0] P, // product + input logic [2*`NF+3:0] Pm, // product output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); + + localparam WIDTH = 3*`NF+7; - logic [3*`NF+6:0] T; - logic [3*`NF+6:0] G; - logic [3*`NF+6:0] Z; - logic [3*`NF+6:0] f; + logic [WIDTH-1:0] B, P, G, K, F; + logic [WIDTH-1:0] Pp1, Gm1, Km1; - assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; - assign G[3*`NF+6:2*`NF+4] = 0; - assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; - assign T[2*`NF+3:0] = A[2*`NF+3:0]^P; - assign G[2*`NF+3:0] = A[2*`NF+3:0]&P; - assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P; + assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product + assign P = A^B; + assign G = A&B; + assign K= ~A&~B; // Apply function to determine Leading pattern // - note: the paper linked above uses the numbering system where 0 is the most significant bit - //f[n] = ~T[n]&T[n-1] note: n is the MSB - //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1])) - assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5]; - assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + //f[n] = ~P[n]&P[n-1] note: n is the MSB + //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) + assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; + assign F[WIDTH-2:0] = (P[3*`NF+6:1]&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); - - - lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt)); - + lzc #(3*`NF+7) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From 3c08aabcd355cce36a39bad253e1f57a3bfc0464 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 11:36:21 -0700 Subject: [PATCH 120/138] LZA refactoring --- pipelined/src/fpu/fma.sv | 2 +- pipelined/src/fpu/fmalza.sv | 26 ++++++++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 5f595b1fc..e698cdaf2 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -85,6 +85,6 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); + fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index a05084e2d..e69ba73f4 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -30,28 +30,34 @@ `include "wally-config.vh" module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+3:0] Pm, // product - output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result + input logic [3*`NF+6:0] A, // addend + input logic [2*`NF+3:0] Pm, // product + input logic Cin, // carry in + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); localparam WIDTH = 3*`NF+7; - logic [WIDTH-1:0] B, P, G, K, F; - logic [WIDTH-1:0] Pp1, Gm1, Km1; + logic [WIDTH-1:0] AA, B, P, G, K, F; + logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product + assign AA = A + Cin; - assign P = A^B; - assign G = A&B; - assign K= ~A&~B; + assign P = AA^B; + assign G = AA&B; + assign K= ~AA&~B; + assign Pp1 = P[WIDTH-1:1]; + assign Gm1 = {G[WIDTH-3:0], Cin}; + assign Km1 = {K[WIDTH-3:0], ~Cin}; + // Apply function to determine Leading pattern // - note: the paper linked above uses the numbering system where 0 is the most significant bit //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; - assign F[WIDTH-2:0] = (P[3*`NF+6:1]&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + assign F[WIDTH-2:0] = (Pp1&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); - lzc #(3*`NF+7) lzc (.num(F), .ZeroCnt(SCnt)); + lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From 99462049e7998190cad96ff49234ee99c69634e8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 12:20:23 -0700 Subject: [PATCH 121/138] LZA refactoring switched to Pp1, Gm1, Km1 --- pipelined/src/fpu/fmalza.sv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index e69ba73f4..afffca472 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -42,11 +42,11 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product - assign AA = A + Cin; +// assign AA = A + Cin; - assign P = AA^B; - assign G = AA&B; - assign K= ~AA&~B; + assign P = A^B; + assign G = A&B; + assign K= ~A&~B; assign Pp1 = P[WIDTH-1:1]; assign Gm1 = {G[WIDTH-3:0], Cin}; @@ -57,7 +57,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; - assign F[WIDTH-2:0] = (Pp1&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + assign F[WIDTH-2:0] = (Pp1&(G[WIDTH-2:0]&~Km1 | K[WIDTH-2:0]&~Gm1)) | (~Pp1&(K[WIDTH-2:0]&~Km1 | G[WIDTH-2:0]&~Gm1)); lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From b34d2065c392d60cc4b80bb2d5e4adba5b582fff Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 12:30:42 -0700 Subject: [PATCH 122/138] LZA cleanup --- pipelined/src/fpu/fmalza.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index afffca472..b7b40091d 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -38,7 +38,8 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, localparam WIDTH = 3*`NF+7; - logic [WIDTH-1:0] AA, B, P, G, K, F; + logic [WIDTH-1:0] B,F; + logic [WIDTH-1:0] P, G, K; logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product @@ -48,7 +49,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, assign G = A&B; assign K= ~A&~B; - assign Pp1 = P[WIDTH-1:1]; + assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; assign Gm1 = {G[WIDTH-3:0], Cin}; assign Km1 = {K[WIDTH-3:0], ~Cin}; From 2869d67e50c06e8746d90c10a3b65e401141467a Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 12:34:00 -0700 Subject: [PATCH 123/138] more lza cleanup --- pipelined/src/fpu/fmalza.sv | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index b7b40091d..f70b1bc93 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -39,15 +39,17 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, localparam WIDTH = 3*`NF+7; logic [WIDTH-1:0] B,F; - logic [WIDTH-1:0] P, G, K; + logic [WIDTH-1:0] P, G; + logic [WIDTH-2:0] K; logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product -// assign AA = A + Cin; + // next steps***replace P[WIDTH-1] with sub, then remove one bit from A + assign P = A^B; - assign G = A&B; - assign K= ~A&~B; + assign G = A[WIDTH-2:0]&B[WIDTH-2:0]; + assign K= ~A[WIDTH-2:0]&~B[WIDTH-2:0]; assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; assign Gm1 = {G[WIDTH-3:0], Cin}; @@ -58,7 +60,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; - assign F[WIDTH-2:0] = (Pp1&(G[WIDTH-2:0]&~Km1 | K[WIDTH-2:0]&~Gm1)) | (~Pp1&(K[WIDTH-2:0]&~Km1 | G[WIDTH-2:0]&~Gm1)); + assign F[WIDTH-2:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From 8ff3a693af891c99945ebac02205a2da7bb92ba5 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 1 Aug 2022 19:56:25 +0000 Subject: [PATCH 124/138] regression passes fpu tests --- pipelined/src/fpu/fpu.sv | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index d98079b2e..4b7a1ffea 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -319,10 +319,23 @@ module fpu ( assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register - to IEU + + logic [`FLEN-1:0] SgnExtXE; + generate + if(`FPSIZES == 1) + assign SgnExtXE = XE; + else if(`FPSIZES == 2) + mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{XsE}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE); + else if(`FPSIZES == 3 | `FPSIZES == 4) + mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{XsE}}, XE[`H_LEN-1:0]}, + {{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]}, + {{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]}, + XE, FmtE, SgnExtXE); // NaN boxing zeroes + endgenerate if (`FLEN>`XLEN) - assign IntSrcXE = XE[`XLEN-1:0]; + assign IntSrcXE = SgnExtXE[`XLEN-1:0]; else - assign IntSrcXE = {{`XLEN-`FLEN{XE[`FLEN-1:0]}}, XE}; + assign IntSrcXE = {{`XLEN-`FLEN{XsE}}, SgnExtXE}; mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); // *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok From d6b5e7a6ef60f3d1a45554319b932a904ebfd46f Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 15:37:09 -0700 Subject: [PATCH 125/138] lza cleanup --- pipelined/src/fpu/fma.sv | 3 ++- pipelined/src/fpu/fmalza.sv | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index e698cdaf2..68a509677 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -85,6 +85,7 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .SCnt); + fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule + diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index f70b1bc93..9de1d745e 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -32,26 +32,27 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend input logic [2*`NF+3:0] Pm, // product - input logic Cin, // carry in + input logic Cin, // carry in + input logic sub, output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); localparam WIDTH = 3*`NF+7; logic [WIDTH-1:0] B,F; - logic [WIDTH-1:0] P, G; - logic [WIDTH-2:0] K; + logic [WIDTH-2:0] P, G, K; logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product // next steps***replace P[WIDTH-1] with sub, then remove one bit from A - assign P = A^B; + assign P = A[WIDTH-2:0]^B[WIDTH-2:0]; assign G = A[WIDTH-2:0]&B[WIDTH-2:0]; assign K= ~A[WIDTH-2:0]&~B[WIDTH-2:0]; - assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; + assign Pp1 = {sub, P[WIDTH-2:1]}; +// assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; assign Gm1 = {G[WIDTH-3:0], Cin}; assign Km1 = {K[WIDTH-3:0], ~Cin}; @@ -59,7 +60,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, // - note: the paper linked above uses the numbering system where 0 is the most significant bit //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) - assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; + assign F[WIDTH-1] = ~sub&P[WIDTH-2]; assign F[WIDTH-2:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); From c3e9719c991d2da019341cd46801decbcf8f8467 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 15:40:12 -0700 Subject: [PATCH 126/138] lza cleanup --- pipelined/src/fpu/fmalza.sv | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index 9de1d745e..c86459edb 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -39,14 +39,12 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, localparam WIDTH = 3*`NF+7; - logic [WIDTH-1:0] B,F; - logic [WIDTH-2:0] P, G, K; + logic [WIDTH-1:0] F; + logic [WIDTH-2:0] B, P, G, K; logic [WIDTH-2:0] Pp1, Gm1, Km1; - assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product + assign B = {{(`NF+2){1'b0}}, Pm}; // Zero extend product - // next steps***replace P[WIDTH-1] with sub, then remove one bit from A - assign P = A[WIDTH-2:0]^B[WIDTH-2:0]; assign G = A[WIDTH-2:0]&B[WIDTH-2:0]; assign K= ~A[WIDTH-2:0]&~B[WIDTH-2:0]; From f56b26ec400add075116f44f61c2e8b0f1399d05 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 15:43:48 -0700 Subject: [PATCH 127/138] lza cleanup --- pipelined/src/fpu/fma.sv | 5 +++-- pipelined/src/fpu/fmalza.sv | 9 ++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 68a509677..dec492eba 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -84,8 +84,9 @@ module fma( // /////////////////////////////////////////////////////////////////////////////// fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - - fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); + + + fmalza lza(.A(AmInv[3*`NF+5:0]), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index c86459edb..d70f0267c 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [3*`NF+6:0] A, // addend + input logic [3*`NF+5:0] A, // addend input logic [2*`NF+3:0] Pm, // product input logic Cin, // carry in input logic sub, @@ -45,12 +45,11 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, assign B = {{(`NF+2){1'b0}}, Pm}; // Zero extend product - assign P = A[WIDTH-2:0]^B[WIDTH-2:0]; - assign G = A[WIDTH-2:0]&B[WIDTH-2:0]; - assign K= ~A[WIDTH-2:0]&~B[WIDTH-2:0]; + assign P = A[WIDTH-2:0]^B; + assign G = A[WIDTH-2:0]&B; + assign K= ~A[WIDTH-2:0]&~B; assign Pp1 = {sub, P[WIDTH-2:1]}; -// assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; assign Gm1 = {G[WIDTH-3:0], Cin}; assign Km1 = {K[WIDTH-3:0], ~Cin}; From 91597bba87da84e99c683edfbd30937db56720ed Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 15:47:03 -0700 Subject: [PATCH 128/138] lza cleanup --- pipelined/src/fpu/fmalza.sv | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index d70f0267c..d71b398e7 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -37,28 +37,28 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); - localparam WIDTH = 3*`NF+7; + localparam WIDTH = 3*`NF+6; - logic [WIDTH-1:0] F; - logic [WIDTH-2:0] B, P, G, K; - logic [WIDTH-2:0] Pp1, Gm1, Km1; + logic [WIDTH:0] F; + logic [WIDTH-1:0] B, P, G, K; + logic [WIDTH-1:0] Pp1, Gm1, Km1; assign B = {{(`NF+2){1'b0}}, Pm}; // Zero extend product - assign P = A[WIDTH-2:0]^B; - assign G = A[WIDTH-2:0]&B; - assign K= ~A[WIDTH-2:0]&~B; + assign P = A^B; + assign G = A&B; + assign K= ~A&~B; - assign Pp1 = {sub, P[WIDTH-2:1]}; - assign Gm1 = {G[WIDTH-3:0], Cin}; - assign Km1 = {K[WIDTH-3:0], ~Cin}; + assign Pp1 = {sub, P[WIDTH-1:1]}; + assign Gm1 = {G[WIDTH-2:0], Cin}; + assign Km1 = {K[WIDTH-2:0], ~Cin}; // Apply function to determine Leading pattern // - note: the paper linked above uses the numbering system where 0 is the most significant bit //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) - assign F[WIDTH-1] = ~sub&P[WIDTH-2]; - assign F[WIDTH-2:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); + assign F[WIDTH] = ~sub&P[WIDTH-1]; + assign F[WIDTH-1:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); - lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); + lzc #(WIDTH+1) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From 3b937b73fdade1a6b1bf2a36233aa912db2f4414 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 16:01:02 -0700 Subject: [PATCH 129/138] lza cleanup --- pipelined/src/fpu/fmalza.sv | 2 -- 1 file changed, 2 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index d71b398e7..fd180fbb6 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -55,8 +55,6 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, // Apply function to determine Leading pattern // - note: the paper linked above uses the numbering system where 0 is the most significant bit - //f[n] = ~P[n]&P[n-1] note: n is the MSB - //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH] = ~sub&P[WIDTH-1]; assign F[WIDTH-1:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); From 94fa7a00e7b28c1d9f32dd3d98e1579fe23917fe Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 16:13:16 -0700 Subject: [PATCH 130/138] Completed LZA simplificaiton --- pipelined/src/fpu/fma.sv | 4 ++-- pipelined/src/fpu/fmaadd.sv | 12 ++++++------ pipelined/src/fpu/fmalza.sv | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index dec492eba..950b55ff1 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -51,7 +51,7 @@ module fma( logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1) - logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted + logic [3*`NF+5:0] AmInv; // aligned addend's mantissa possibly inverted logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed /////////////////////////////////////////////////////////////////////////////// // Calculate the product @@ -86,7 +86,7 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv[3*`NF+5:0]), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); + fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmaadd.sv b/pipelined/src/fpu/fmaadd.sv index 4b52208c6..56ce5a74e 100644 --- a/pipelined/src/fpu/fmaadd.sv +++ b/pipelined/src/fpu/fmaadd.sv @@ -37,7 +37,7 @@ module fmaadd( input logic ZmSticky, input logic [`NE-1:0] Ze, input logic [`NE+1:0] Pe, - output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted + output logic [3*`NF+5:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend @@ -45,7 +45,7 @@ module fmaadd( output logic [`NE+1:0] Se, output logic [3*`NF+5:0] Sm // the positive sum ); - logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum + logic [3*`NF+5:0] PreSum, NegPreSum; // possibly negitive sum /////////////////////////////////////////////////////////////////////////////// // Addition @@ -57,7 +57,7 @@ module fmaadd( assign InvA = As ^ Ps; // Choose an inverted or non-inverted addend - the one has to be added now for the LZA - assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; + assign AmInv = InvA ? ~Am : Am; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) assign PmKilled = Pm&{2*`NF+2{~KillProd}}; // Do the addition @@ -66,11 +66,11 @@ module fmaadd( // PreSum -1 = don't add 1 +1 = add 2 // NegPreSum +1 = add 2 -1 = don't add 1 // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 - assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; - assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; + assign PreSum = {{`NF+2{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+5{1'b0}}, InvA&~((ZmSticky&~KillProd))}; + assign NegPreSum = Am + {{`NF+2{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+4)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; // Is the sum negitive - assign NegSum = PreSum[3*`NF+6]; + assign NegSum = PreSum[3*`NF+5]; // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index fd180fbb6..65fe94266 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -39,7 +39,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, localparam WIDTH = 3*`NF+6; - logic [WIDTH:0] F; + logic [WIDTH:0] F; logic [WIDTH-1:0] B, P, G, K; logic [WIDTH-1:0] Pp1, Gm1, Km1; From 7e4b04ff643624de43cdf64ea2144da46750324c Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 16:18:02 -0700 Subject: [PATCH 131/138] Parameterized fmalza --- pipelined/src/fpu/fma.sv | 4 ++-- pipelined/src/fpu/fmalza.sv | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 950b55ff1..0106af7d5 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -85,8 +85,8 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - - fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); + + fmalza #(3*`NF+6) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index 65fe94266..8e92a5dc4 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -29,16 +29,14 @@ `include "wally-config.vh" -module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [3*`NF+5:0] A, // addend +module fmalza #(WIDTH) ( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] + input logic [WIDTH-1:0] A, // addend input logic [2*`NF+3:0] Pm, // product input logic Cin, // carry in input logic sub, - output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result + output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result ); - localparam WIDTH = 3*`NF+6; - logic [WIDTH:0] F; logic [WIDTH-1:0] B, P, G, K; logic [WIDTH-1:0] Pp1, Gm1, Km1; From 3612db2d70e5e927952f7a21bcd0ecffbb030293 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 1 Aug 2022 20:48:45 -0500 Subject: [PATCH 132/138] pulled swbbytemask out of subword write. --- pipelined/src/ebu/ahblite.sv | 4 +--- pipelined/src/lsu/lsu.sv | 12 +++++++++--- pipelined/src/lsu/subwordwrite.sv | 10 +--------- pipelined/src/wally/wallypipelinedcore.sv | 3 --- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/pipelined/src/ebu/ahblite.sv b/pipelined/src/ebu/ahblite.sv index a502bf9f4..4203f0f52 100644 --- a/pipelined/src/ebu/ahblite.sv +++ b/pipelined/src/ebu/ahblite.sv @@ -49,7 +49,6 @@ module ahblite ( input logic [2:0] IFUBurstType, input logic [1:0] IFUTransType, input logic IFUTransComplete, - input logic [(`XLEN-1)/8:0] ByteMaskM, // Signals from Data Cache input logic [`PA_BITS-1:0] LSUBusAdr, @@ -157,8 +156,7 @@ module ahblite ( assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise assign HMASTLOCK = 0; // no locking supported assign HWRITE = (NextBusState == MEMWRITE); - //assign HWSTRB = ByteMaskM; - // Byte mask for HWSTRB + // Byte mask for HWSTRB swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(HWSTRB)); // delay write data by one cycle for diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index e6f632640..540ff6b87 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -77,8 +77,6 @@ module lsu ( (* mark_debug = "true" *) output logic [2:0] LSUBurstType, (* mark_debug = "true" *) output logic [1:0] LSUTransType, (* mark_debug = "true" *) output logic LSUTransComplete, - output logic [(`XLEN-1)/8:0] ByteMaskM, - // page table walker input logic [`XLEN-1:0] SATP_REGW, // from csr input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, @@ -116,6 +114,7 @@ module lsu ( logic [`XLEN-1:0] LSUWriteDataM; logic [`XLEN-1:0] WriteDataM; logic [`LLEN-1:0] ReadDataM; + logic [(`XLEN-1)/8:0] ByteMaskM; // *** TO DO: Burst mode @@ -277,7 +276,14 @@ module lsu ( subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]), .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]), - .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM); + .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM); + + // Compute byte masks + //swbytemask swbytemask(.Size(LSUFunct3M[1:0]), .Adr(LSUPAdrM[2:0]), .ByteMask(ByteMaskM)); + swbytemaskword #(`XLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`XLEN/8)-1:0]), .ByteMask(ByteMaskM)); + // *** fix me. + //swbytemaskword #(.WORDLEN(`XLEN)) + //swbytemaskword (.Size(LSUFunct3M[2:0]), .Adr(LSUPAdrM), .ByteMask(ByteMaskM)); ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register diff --git a/pipelined/src/lsu/subwordwrite.sv b/pipelined/src/lsu/subwordwrite.sv index b93062365..d42033ef7 100644 --- a/pipelined/src/lsu/subwordwrite.sv +++ b/pipelined/src/lsu/subwordwrite.sv @@ -34,16 +34,8 @@ module subwordwrite ( input logic [2:0] LSUPAdrM, input logic [2:0] LSUFunct3M, input logic [`XLEN-1:0] AMOWriteDataM, - output logic [`XLEN-1:0] LittleEndianWriteDataM, - output logic [`XLEN/8-1:0] ByteMaskM -); + output logic [`XLEN-1:0] LittleEndianWriteDataM); - // Compute byte masks - //swbytemask swbytemask(.Size(LSUFunct3M[1:0]), .Adr(LSUPAdrM), .ByteMask(ByteMaskM)); - // *** fix me. - swbytemaskword #(.WORDLEN(`XLEN)) - swbytemaskword (.Size(LSUFunct3M[2:0]), .Adr(LSUPAdrM), .ByteMask(ByteMaskM)); - // Replicate data for subword writes if (`XLEN == 64) begin:sww always_comb diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 372f4aba5..200789c41 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -116,7 +116,6 @@ module wallypipelinedcore ( logic [1:0] PageType; logic sfencevmaM, wfiM, IntPendingM; logic SelHPTW; - logic [`XLEN/8-1:0] ByteMaskM; // PMA checker signals @@ -266,7 +265,6 @@ module wallypipelinedcore ( // connected to ahb (all stay the same) .LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit, .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete, - .ByteMaskM, // connect to csr or privilege and stay the same. .PrivilegeModeW, .BigEndianM, // connects to csr @@ -313,7 +311,6 @@ module wallypipelinedcore ( .LSUTransComplete, .LSUBusAck, .LSUBusInit, - .ByteMaskM, .HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, From 3cd840491711c01b478e004421beb62a5c2f42a7 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 1 Aug 2022 21:06:36 -0500 Subject: [PATCH 133/138] Replaced LOGWPL with LOGBWPL (Bus words per line) and LOGCWPL (cache words per line). Replaced with wordlen/8 bytemask. --- pipelined/src/cache/cache.sv | 26 +++++++++++++------------ pipelined/src/cache/subcachelineread.sv | 2 +- pipelined/src/ifu/ifu.sv | 6 +++--- pipelined/src/lsu/lsu.sv | 18 +++++++---------- 4 files changed, 25 insertions(+), 27 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index e168b2c15..15791c94f 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" -module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTERVAL, DCACHE) ( +module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) ( input logic clk, input logic reset, // cpu side @@ -41,7 +41,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER input logic InvalidateCache, input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] PAdr, // physical address - input logic [(`XLEN-1)/8:0] ByteMask, + input logic [(WORDLEN-1)/8:0] ByteMask, input logic [WORDLEN-1:0] FinalWriteData, input logic FStore2, output logic CacheCommitted, @@ -58,7 +58,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER output logic CacheFetchLine, output logic CacheWriteLine, input logic CacheBusAck, - input logic [LOGWPL-1:0] WordCount, + input logic [LOGBWPL-1:0] WordCount, input logic LSUBusWriteCrit, output logic [`PA_BITS-1:0] CacheBusAdr, input logic [LINELEN-1:0] CacheBusWriteData, @@ -110,8 +110,10 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER logic SelBusBuffer; logic SRAMEnable; - localparam LOGXLENBYTES = $clog2(`XLEN/8); - logic [2**LOGWPL-1:0] MemPAdrDecoded; + localparam LOGLLENBYTES = $clog2(WORDLEN/8); + localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN; + localparam LOGCWPL = $clog2(CACHEWORDSPERLINE); + logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded; logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, LineByteMux; genvar index; @@ -145,14 +147,14 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER // like to fix this. if(DCACHE) - mux2 #(LOGWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]), + mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]), .d1(WordCount), .s(LSUBusWriteCrit), .y(WordOffsetAddr)); else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]; - mux2 #(LINELEN) EarlyReturnBuf(ReadDataLineCache, CacheBusWriteData, SelBusBuffer, ReadDataLine); + mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, CacheBusWriteData, SelBusBuffer, ReadDataLine); - subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL, LOGWPL) subcachelineread( + subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread( .PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord); @@ -162,10 +164,10 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER logic [LINELEN-1:0] FinalWriteDataDup; assign FinalWriteDataDup = {WORDSPERLINE{FinalWriteData}}; - onehotdecoder #(LOGWPL) adrdec( - .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); - for(index = 0; index < 2**LOGWPL; index++) begin - assign DemuxedByteMask[(index+1)*(`XLEN/8)-1:index*(`XLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; + onehotdecoder #(LOGCWPL) adrdec( + .bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded)); + for(index = 0; index < 2**LOGCWPL; index++) begin + assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; end // *** have to add back in fstore2 assign LineByteMux = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. diff --git a/pipelined/src/cache/subcachelineread.sv b/pipelined/src/cache/subcachelineread.sv index fe9139659..a9db47c13 100644 --- a/pipelined/src/cache/subcachelineread.sv +++ b/pipelined/src/cache/subcachelineread.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" -module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)( +module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL)( input logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1 : 0] PAdr, input logic [LINELEN-1:0] ReadDataLine, output logic [WORDLEN-1:0] ReadDataWord); diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index af78d842d..4e7a427fb 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -196,13 +196,13 @@ module ifu ( if (`IBUS) begin : bus localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS/`XLEN : 1; localparam integer LINELEN = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS : `XLEN; - localparam integer LOGWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1; + localparam integer LOGBWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1; logic [LINELEN-1:0] ICacheBusWriteData; logic [`PA_BITS-1:0] ICacheBusAdr; logic ICacheBusAck; logic SelUncachedAdr; - busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) + busdp #(WORDSPERLINE, LINELEN, LOGBWPL, CACHE_ENABLED) busdp(.clk, .reset, .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(), .LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete), @@ -222,7 +222,7 @@ module ifu ( if(CACHE_ENABLED) begin : icache cache #(.LINELEN(`ICACHE_LINELENINBITS), .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), - .NUMWAYS(`ICACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0)) + .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0)) icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0), .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 540ff6b87..fe5447c4f 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -114,7 +114,7 @@ module lsu ( logic [`XLEN-1:0] LSUWriteDataM; logic [`XLEN-1:0] WriteDataM; logic [`LLEN-1:0] ReadDataM; - logic [(`XLEN-1)/8:0] ByteMaskM; + logic [(`LLEN-1)/8:0] ByteMaskM; // *** TO DO: Burst mode @@ -204,22 +204,22 @@ module lsu ( // Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM, - .DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM), + .DCacheStallM, .DCacheCommittedM, .ByteMaskM(ByteMaskM[`XLEN/8-1:0]), .Cacheable(CacheableM), .DCacheMiss, .DCacheAccess); end if (`DBUS) begin : bus localparam CACHE_ENABLED = `DMEM == `MEM_CACHE; localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS/`XLEN : 1; localparam integer LINELEN = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS : `XLEN; - localparam integer LOGWPL = (CACHE_ENABLED) ? $clog2(WORDSPERLINE) : 1; + localparam integer LOGBWPL = (CACHE_ENABLED) ? $clog2(WORDSPERLINE) : 1; logic [LINELEN-1:0] DCacheBusWriteData; logic [`PA_BITS-1:0] DCacheBusAdr; logic DCacheWriteLine; logic DCacheFetchLine; logic DCacheBusAck; - logic [LOGWPL-1:0] WordCount; + logic [LOGBWPL-1:0] WordCount; - busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busdp( + busdp #(WORDSPERLINE, LINELEN, LOGBWPL, CACHE_ENABLED) busdp( .clk, .reset, .LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete, .WordCount, .LSUBusWriteCrit, @@ -239,7 +239,7 @@ module lsu ( else assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), - .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( + .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), .ByteMask(ByteMaskM), .WordCount, .FStore2, @@ -279,11 +279,7 @@ module lsu ( .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM); // Compute byte masks - //swbytemask swbytemask(.Size(LSUFunct3M[1:0]), .Adr(LSUPAdrM[2:0]), .ByteMask(ByteMaskM)); - swbytemaskword #(`XLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`XLEN/8)-1:0]), .ByteMask(ByteMaskM)); - // *** fix me. - //swbytemaskword #(.WORDLEN(`XLEN)) - //swbytemaskword (.Size(LSUFunct3M[2:0]), .Adr(LSUPAdrM), .ByteMask(ByteMaskM)); + swbytemaskword #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM)); ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register From 797d9e36107a305d75696597d07acf608afee248 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 1 Aug 2022 21:12:25 -0500 Subject: [PATCH 134/138] Replaced swbytemask with swbytemaskword (1 liner). Credit to David Harris. --- pipelined/src/ebu/ahblite.sv | 2 +- pipelined/src/lsu/lsu.sv | 2 +- pipelined/src/lsu/swbytemask.sv | 26 ++++++++++---------------- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/pipelined/src/ebu/ahblite.sv b/pipelined/src/ebu/ahblite.sv index 4203f0f52..be7f789e4 100644 --- a/pipelined/src/ebu/ahblite.sv +++ b/pipelined/src/ebu/ahblite.sv @@ -157,7 +157,7 @@ module ahblite ( assign HMASTLOCK = 0; // no locking supported assign HWRITE = (NextBusState == MEMWRITE); // Byte mask for HWSTRB - swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(HWSTRB)); + swbytemask #(`XLEN) swbytemask(.Size(HSIZED[2:0]), .Adr(HADDRD[$clog2(`XLEN/8)-1:0]), .ByteMask(HWSTRB)); // delay write data by one cycle for flopen #(`XLEN) wdreg(HCLK, (LSUBusAck | LSUBusInit), LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index fe5447c4f..d472a80fd 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -279,7 +279,7 @@ module lsu ( .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM); // Compute byte masks - swbytemaskword #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM)); + swbytemask #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM)); ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register diff --git a/pipelined/src/lsu/swbytemask.sv b/pipelined/src/lsu/swbytemask.sv index 7f89e628a..faae408e9 100644 --- a/pipelined/src/lsu/swbytemask.sv +++ b/pipelined/src/lsu/swbytemask.sv @@ -30,13 +30,15 @@ `include "wally-config.vh" -module swbytemask ( - input logic [1:0] Size, - input logic [2:0] Adr, - output logic [`XLEN/8-1:0] ByteMask); - +module swbytemask #(parameter WORDLEN = 64)( + input logic [2:0] Size, + input logic [$clog2(WORDLEN/8)-1:0] Adr, + output logic [WORDLEN/8-1:0] ByteMask); - if(`XLEN == 64) begin + assign ByteMask = ((2**(2**Size))-1) << Adr; + +/* Equivalent to the following for WORDLEN = 64 + if(WORDLEN == 64) begin always_comb begin case(Size[1:0]) 2'b00: begin ByteMask = 8'b00000000; ByteMask[Adr[2:0]] = 1; end // sb @@ -49,18 +51,10 @@ module swbytemask ( 2'b10: if (Adr[2]) ByteMask = 8'b11110000; else ByteMask = 8'b00001111; 2'b11: ByteMask = 8'b1111_1111; - endcase - end - end else begin - always_comb begin - case(Size[1:0]) - 2'b00: begin ByteMask = 4'b0000; ByteMask[Adr[1:0]] = 1; end // sb - 2'b01: if (Adr[1]) ByteMask = 4'b1100; - else ByteMask = 4'b0011; - 2'b10: ByteMask = 4'b1111; - default: ByteMask = 4'b1111; + default ByteMask = 8'b0000_0000; endcase end end +*/ endmodule From 8147f7539952bcc3866af219c7e718f58a6bee26 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 19:40:55 -0700 Subject: [PATCH 135/138] Fixed fmaadd to work with new LZA --- pipelined/src/fpu/fma.sv | 4 ++-- pipelined/src/fpu/fmaadd.sv | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index dec492eba..950b55ff1 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -51,7 +51,7 @@ module fma( logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1) - logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted + logic [3*`NF+5:0] AmInv; // aligned addend's mantissa possibly inverted logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed /////////////////////////////////////////////////////////////////////////////// // Calculate the product @@ -86,7 +86,7 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv[3*`NF+5:0]), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); + fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmaadd.sv b/pipelined/src/fpu/fmaadd.sv index 4b52208c6..53ed023f8 100644 --- a/pipelined/src/fpu/fmaadd.sv +++ b/pipelined/src/fpu/fmaadd.sv @@ -37,7 +37,7 @@ module fmaadd( input logic ZmSticky, input logic [`NE-1:0] Ze, input logic [`NE+1:0] Pe, - output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted + output logic [3*`NF+5:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend @@ -45,7 +45,7 @@ module fmaadd( output logic [`NE+1:0] Se, output logic [3*`NF+5:0] Sm // the positive sum ); - logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum + logic [3*`NF+5:0] PreSum, NegPreSum; // possibly negitive sum /////////////////////////////////////////////////////////////////////////////// // Addition @@ -57,7 +57,7 @@ module fmaadd( assign InvA = As ^ Ps; // Choose an inverted or non-inverted addend - the one has to be added now for the LZA - assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; + assign AmInv = InvA ? ~Am : Am; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) assign PmKilled = Pm&{2*`NF+2{~KillProd}}; // Do the addition @@ -66,14 +66,14 @@ module fmaadd( // PreSum -1 = don't add 1 +1 = add 2 // NegPreSum +1 = add 2 -1 = don't add 1 // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 - assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; - assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; + assign {NegSum, PreSum} = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + {InvA, AmInv} + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; + assign NegPreSum = Am + {{`NF+2{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+4)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; // Is the sum negitive - assign NegSum = PreSum[3*`NF+6]; +// assign NegSum = PreSum[3*`NF+6]; // Choose the positive sum and accompanying LZA result. - assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; + assign Sm = NegSum ? NegPreSum : PreSum; // is the result negitive // if p - z is the Sum negitive // if -p + z is the Sum positive From 57fcf0ef79f0a656c0b93989c5ba0c21229fc57c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 1 Aug 2022 21:22:48 -0500 Subject: [PATCH 136/138] Fixed fstore2 in cache? --- pipelined/src/cache/cache.sv | 4 +- pipelined/src/cache/cacheway.sv | 11 ------ pipelined/src/ebu/ahblite.sv | 2 +- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/lsu.sv | 9 +++-- pipelined/src/lsu/swbytemask.sv | 26 ++++++++----- pipelined/src/lsu/swbytemaskword.sv | 59 +++++++++++++++++++++++++++++ 7 files changed, 84 insertions(+), 29 deletions(-) create mode 100644 pipelined/src/lsu/swbytemaskword.sv diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 15791c94f..a9263bcf2 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -43,7 +43,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE input logic [`PA_BITS-1:0] PAdr, // physical address input logic [(WORDLEN-1)/8:0] ByteMask, input logic [WORDLEN-1:0] FinalWriteData, - input logic FStore2, output logic CacheCommitted, output logic CacheStall, // to performance counters to cpu @@ -129,7 +128,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) - CacheWays[NUMWAYS-1:0](.clk, .reset, .ce(SRAMEnable), .RAdr, .PAdr, .CacheWriteData, .LineByteMask, .FStore2, + CacheWays[NUMWAYS-1:0](.clk, .reset, .ce(SRAMEnable), .RAdr, .PAdr, .CacheWriteData, .LineByteMask, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .Invalidate(InvalidateCache)); @@ -169,7 +168,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE for(index = 0; index < 2**LOGCWPL; index++) begin assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; end - // *** have to add back in fstore2 assign LineByteMux = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. assign LineByteMask = ~SetValid & ~SetDirty ? '0 : ~SetValid & SetDirty ? DemuxedByteMask : '1; // if store hit only enable the word and subword bytes, else write all bytes. diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 0fbe4adbc..58f03e0eb 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -39,7 +39,6 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic [$clog2(NUMLINES)-1:0] RAdr, input logic [`PA_BITS-1:0] PAdr, input logic [LINELEN-1:0] CacheWriteData, - input logic FStore2, input logic SetValidWay, input logic ClearValidWay, input logic SetDirtyWay, @@ -81,16 +80,6 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, ///////////////////////////////////////////////////////////////////////////////////////////// // Write Enable demux ///////////////////////////////////////////////////////////////////////////////////////////// -/* -----\/----- EXCLUDED -----\/----- - if(`LLEN>`XLEN)begin - logic [2**LOGWPL-1:0] MemPAdrDecodedtmp; - onehotdecoder #(LOGWPL) adrdec( - .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp)); - assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FStore2}}, 1'b0}; - end else - onehotdecoder #(LOGWPL) adrdec( - .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); - -----/\----- EXCLUDED -----/\----- */ // If writing the whole line set all write enables to 1, else only set the correct word. assign SelectedWriteWordEn = SetValidWay | SetDirtyWay;// ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND assign FinalByteMask = SetValidWay ? '1 : LineByteMask; // OR diff --git a/pipelined/src/ebu/ahblite.sv b/pipelined/src/ebu/ahblite.sv index be7f789e4..4203f0f52 100644 --- a/pipelined/src/ebu/ahblite.sv +++ b/pipelined/src/ebu/ahblite.sv @@ -157,7 +157,7 @@ module ahblite ( assign HMASTLOCK = 0; // no locking supported assign HWRITE = (NextBusState == MEMWRITE); // Byte mask for HWSTRB - swbytemask #(`XLEN) swbytemask(.Size(HSIZED[2:0]), .Adr(HADDRD[$clog2(`XLEN/8)-1:0]), .ByteMask(HWSTRB)); + swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(HWSTRB)); // delay write data by one cycle for flopen #(`XLEN) wdreg(HCLK, (LSUBusAck | LSUBusInit), LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 4e7a427fb..b4b160289 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -226,7 +226,7 @@ module ifu ( icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0), .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), - .CacheFetchLine(ICacheFetchLine), .FStore2(), + .CacheFetchLine(ICacheFetchLine), .CacheWriteLine(), .ReadDataWord(FinalInstrRawF), .Cacheable(CacheableF), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index d472a80fd..08d217e17 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -114,7 +114,7 @@ module lsu ( logic [`XLEN-1:0] LSUWriteDataM; logic [`XLEN-1:0] WriteDataM; logic [`LLEN-1:0] ReadDataM; - logic [(`LLEN-1)/8:0] ByteMaskM; + logic [(`LLEN-1)/8:0] ByteMaskM, FinalByteMaskM; // *** TO DO: Burst mode @@ -242,7 +242,7 @@ module lsu ( .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .ByteMask(ByteMaskM), .WordCount, .FStore2, + .ByteMask(FinalByteMaskM), .WordCount, .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), @@ -279,7 +279,10 @@ module lsu ( .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM); // Compute byte masks - swbytemask #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM)); + swbytemaskword #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM)); + // *** fix when when fstore2 is valid. I'm not sure this is even needed if LSUFunct3M can be 3'b100 for a 16 byte write. + //assign FinalByteMaskM = FStore2 ? '1 : ByteMaskM; + assign FinalByteMaskM = ByteMaskM; ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register diff --git a/pipelined/src/lsu/swbytemask.sv b/pipelined/src/lsu/swbytemask.sv index faae408e9..7f89e628a 100644 --- a/pipelined/src/lsu/swbytemask.sv +++ b/pipelined/src/lsu/swbytemask.sv @@ -30,15 +30,13 @@ `include "wally-config.vh" -module swbytemask #(parameter WORDLEN = 64)( - input logic [2:0] Size, - input logic [$clog2(WORDLEN/8)-1:0] Adr, - output logic [WORDLEN/8-1:0] ByteMask); +module swbytemask ( + input logic [1:0] Size, + input logic [2:0] Adr, + output logic [`XLEN/8-1:0] ByteMask); + - assign ByteMask = ((2**(2**Size))-1) << Adr; - -/* Equivalent to the following for WORDLEN = 64 - if(WORDLEN == 64) begin + if(`XLEN == 64) begin always_comb begin case(Size[1:0]) 2'b00: begin ByteMask = 8'b00000000; ByteMask[Adr[2:0]] = 1; end // sb @@ -51,10 +49,18 @@ module swbytemask #(parameter WORDLEN = 64)( 2'b10: if (Adr[2]) ByteMask = 8'b11110000; else ByteMask = 8'b00001111; 2'b11: ByteMask = 8'b1111_1111; - default ByteMask = 8'b0000_0000; + endcase + end + end else begin + always_comb begin + case(Size[1:0]) + 2'b00: begin ByteMask = 4'b0000; ByteMask[Adr[1:0]] = 1; end // sb + 2'b01: if (Adr[1]) ByteMask = 4'b1100; + else ByteMask = 4'b0011; + 2'b10: ByteMask = 4'b1111; + default: ByteMask = 4'b1111; endcase end end -*/ endmodule diff --git a/pipelined/src/lsu/swbytemaskword.sv b/pipelined/src/lsu/swbytemaskword.sv new file mode 100644 index 000000000..4569355fb --- /dev/null +++ b/pipelined/src/lsu/swbytemaskword.sv @@ -0,0 +1,59 @@ +/////////////////////////////////////////// +// swbytemask.sv +// +// Written: David_Harris@hmc.edu 9 January 2021 +// Modified: +// +// Purpose: On-chip RAM, external to core +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR CO/////////////////////////////////////////// +// swbytemask.sv +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module swbytemaskword #(parameter WORDLEN = 64)( + input logic [2:0] Size, + input logic [$clog2(WORDLEN/8)-1:0] Adr, + output logic [WORDLEN/8-1:0] ByteMask); + + assign ByteMask = ((2**(2**Size))-1) << Adr; + +/* Equivalent to the following for WORDLEN = 64 + if(WORDLEN == 64) begin + always_comb begin + case(Size[1:0]) + 2'b00: begin ByteMask = 8'b00000000; ByteMask[Adr[2:0]] = 1; end // sb + 2'b01: case (Adr[2:1]) + 2'b00: ByteMask = 8'b0000_0011; + 2'b01: ByteMask = 8'b0000_1100; + 2'b10: ByteMask = 8'b0011_0000; + 2'b11: ByteMask = 8'b1100_0000; + endcase + 2'b10: if (Adr[2]) ByteMask = 8'b11110000; + else ByteMask = 8'b00001111; + 2'b11: ByteMask = 8'b1111_1111; + default ByteMask = 8'b0000_0000; + endcase + end + end +*/ + +endmodule + From 887e4c73fbfbf1d20e1f8c11cb9bbd57c1e9eaa1 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 2 Aug 2022 07:34:09 -0700 Subject: [PATCH 137/138] Moved InvA to sign block; simplified fmaexpadd coding --- pipelined/src/fpu/fma.sv | 2 +- pipelined/src/fpu/fmaadd.sv | 7 +------ pipelined/src/fpu/fmaexpadd.sv | 7 +++++-- pipelined/src/fpu/fmasign.sv | 8 +++++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index de1fdabf6..d12f497eb 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -72,7 +72,7 @@ module fma( // Alignment shifter /////////////////////////////////////////////////////////////////////////////// // calculate the signs and take the opperation into account - fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As); + fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA); fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ZmSticky, .KillProd); diff --git a/pipelined/src/fpu/fmaadd.sv b/pipelined/src/fpu/fmaadd.sv index 53ed023f8..653b9b3e2 100644 --- a/pipelined/src/fpu/fmaadd.sv +++ b/pipelined/src/fpu/fmaadd.sv @@ -33,6 +33,7 @@ module fmaadd( input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) input logic [2*`NF+1:0] Pm, // the product's mantissa input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) + input logic InvA, // invert the aligned addend input logic KillProd, // should the product be set to 0 input logic ZmSticky, input logic [`NE-1:0] Ze, @@ -40,7 +41,6 @@ module fmaadd( output logic [3*`NF+5:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive - output logic InvA, // do you invert the aligned addend output logic Ss, output logic [`NE+1:0] Se, output logic [3*`NF+5:0] Sm // the positive sum @@ -51,11 +51,6 @@ module fmaadd( // Addition /////////////////////////////////////////////////////////////////////////////// - // Negate Z when doing one of the following opperations: - // -prod + Z - // prod - Z - assign InvA = As ^ Ps; - // Choose an inverted or non-inverted addend - the one has to be added now for the LZA assign AmInv = InvA ? ~Am : Am; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) diff --git a/pipelined/src/fpu/fmaexpadd.sv b/pipelined/src/fpu/fmaexpadd.sv index 1d208327b..d39dfadde 100644 --- a/pipelined/src/fpu/fmaexpadd.sv +++ b/pipelined/src/fpu/fmaexpadd.sv @@ -36,7 +36,10 @@ module fmaexpadd( output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2 ); + logic PZero; + // kill the exponent if the product is zero - either X or Y is 0 - assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}}; + assign PZero = XZero | YZero; + assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)}); -endmodule \ No newline at end of file +endmodule diff --git a/pipelined/src/fpu/fmasign.sv b/pipelined/src/fpu/fmasign.sv index 66c1af83a..936eea211 100644 --- a/pipelined/src/fpu/fmasign.sv +++ b/pipelined/src/fpu/fmasign.sv @@ -33,7 +33,8 @@ module fmasign( input logic [2:0] OpCtrl, // opperation contol input logic Xs, Ys, Zs, // sign of the inputs output logic Ps, // the product's sign - takes opperation into account - output logic As // aligned addend sign used in fma - takes opperation into account + output logic As, // aligned addend sign used in fma - takes opperation into account + output logic InvA // Effective subtraction: invert addend ); // Calculate the product's sign @@ -41,7 +42,8 @@ module fmasign( // flip is negation opperation assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); - // flip if subtraction + // flip addend sign for subtraction assign As = Zs^OpCtrl[0]; - + // Effective subtraction when product and addend have opposite signs + assign InvA = As ^ Ps; endmodule From e70b28f7f60519a3a58b9b5b6c06242ca169798f Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 2 Aug 2022 07:42:32 -0700 Subject: [PATCH 138/138] FMA cleanup --- pipelined/src/fpu/fmaadd.sv | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pipelined/src/fpu/fmaadd.sv b/pipelined/src/fpu/fmaadd.sv index 653b9b3e2..af7b15bf4 100644 --- a/pipelined/src/fpu/fmaadd.sv +++ b/pipelined/src/fpu/fmaadd.sv @@ -51,10 +51,10 @@ module fmaadd( // Addition /////////////////////////////////////////////////////////////////////////////// - // Choose an inverted or non-inverted addend - the one has to be added now for the LZA + // Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition assign AmInv = InvA ? ~Am : Am; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign PmKilled = Pm&{2*`NF+2{~KillProd}}; + assign PmKilled = KillProd ? '0 : Pm; // Do the addition // - calculate a positive and negitive sum in parallel // Zsticky Psticky @@ -64,15 +64,12 @@ module fmaadd( assign {NegSum, PreSum} = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + {InvA, AmInv} + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; assign NegPreSum = Am + {{`NF+2{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+4)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; - // Is the sum negitive -// assign NegSum = PreSum[3*`NF+6]; - // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum : PreSum; // is the result negitive // if p - z is the Sum negitive // if -p + z is the Sum positive // if -p - z then the Sum is negitive - assign Ss = NegSum^Ps; //*** move to execute stage + assign Ss = NegSum^Ps; assign Se = KillProd ? {2'b0, Ze} : Pe; endmodule