diff --git a/wally-pipelined/config/buildroot/wally-config.vh b/wally-pipelined/config/buildroot/wally-config.vh index 3a834ab0..fb15098e 100644 --- a/wally-pipelined/config/buildroot/wally-config.vh +++ b/wally-pipelined/config/buildroot/wally-config.vh @@ -43,15 +43,26 @@ `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 +`define MEM_DCACHE 1 `define MEM_DTIM 1 -`define MEM_ICACHE 0 +`define MEM_ICACHE 1 `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 // Domenico Ottolia 4/15: Support for vectored interrupts in _tvec csrs. Just implemented in src/privileged/trap.sv around line 75. Pretty sure this should be 1. +// TLB configuration. Entries should be a power of 2 `define ITLB_ENTRIES 32 `define DTLB_ENTRIES 32 +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 2048 +`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_REPLBITS 3 +`define ICACHE_NUMWAYS 1 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_BLOCKLENINBITS 256 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/busybear/wally-config.vh b/wally-pipelined/config/busybear/wally-config.vh index d3822863..59daaf36 100644 --- a/wally-pipelined/config/busybear/wally-config.vh +++ b/wally-pipelined/config/busybear/wally-config.vh @@ -44,15 +44,26 @@ `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 +`define MEM_DCACHE 1 `define MEM_DTIM 1 -`define MEM_ICACHE 0 +`define MEM_ICACHE 1 `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 // Domenico Ottolia 4/15: Support for vectored interrupts in _tvec csrs. Just implemented in src/privileged/trap.sv around line 75. Pretty sure this should be 1. +// TLB configuration. Entries should be a power of 2 `define ITLB_ENTRIES 32 `define DTLB_ENTRIES 32 +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 2048 +`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_REPLBITS 3 +`define ICACHE_NUMWAYS 1 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_BLOCKLENINBITS 256 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/coremark-64i/wally-config.vh b/wally-pipelined/config/coremark-64i/wally-config.vh deleted file mode 100644 index ae3100c6..00000000 --- a/wally-pipelined/config/coremark-64i/wally-config.vh +++ /dev/null @@ -1,84 +0,0 @@ -////////////////////////////////////////// -// wally-config.vh -// -// Written: David_Harris@hmc.edu 4 January 2021 -// Modified: -// -// Purpose: Specify which features are configured -// Macros to determine which modes are supported based on MISA -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -// include shared configuration -`include "wally-shared.vh" - -// RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 - -//`define MISA (32'h00000104) -`define MISA (32'h00000104 | 1<<5 | 1<<18 | 1 << 20) -`define ZCSR_SUPPORTED 1 -`define ZCOUNTERS_SUPPORTED 1 - -// Microarchitectural Features -`define UARCH_PIPELINED 1 -`define UARCH_SUPERSCALR 0 -`define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 -`define MEM_DTIM 1 -`define MEM_ICACHE 0 -`define MEM_VIRTMEM 0 - -// Address space -`define RESET_VECTOR 64'h0000000000001000 - -// Bus Interface width -`define AHBW 64 - -// Peripheral Addresses -// Peripheral memory space extends from BASE to BASE+RANGE -// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits - -`define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 56'h00001000 -`define BOOTTIM_RANGE 56'h00000FFF -`define TIM_SUPPORTED 1'b1 -`define TIM_BASE 56'h80000000 -`define TIM_RANGE 56'h07FFFFFF -`define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 56'h02000000 -`define CLINT_RANGE 56'h0000FFFF -`define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 56'h10012000 -`define GPIO_RANGE 56'h000000FF -`define UART_SUPPORTED 1'b1 -`define UART_BASE 56'h10000000 -`define UART_RANGE 56'h00000007 -`define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 56'h0C000000 -`define PLIC_RANGE 56'h03FFFFFF - -// Test modes - -// Tie GPIO outputs back to inputs -`define GPIO_LOOPBACK_TEST 0 - - -// Hardware configuration -`define UART_PRESCALE 1 - diff --git a/wally-pipelined/config/coremark/wally-config.vh b/wally-pipelined/config/coremark/wally-config.vh index e4e3376d..8fa6f8c9 100644 --- a/wally-pipelined/config/coremark/wally-config.vh +++ b/wally-pipelined/config/coremark/wally-config.vh @@ -43,15 +43,26 @@ `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 +`define MEM_DCACHE 1 `define MEM_DTIM 1 -`define MEM_ICACHE 0 +`define MEM_ICACHE 1 `define MEM_VIRTMEM 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 +// TLB configuration. Entries should be a power of 2 `define ITLB_ENTRIES 32 `define DTLB_ENTRIES 32 +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 2048 +`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_REPLBITS 3 +`define ICACHE_NUMWAYS 1 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_BLOCKLENINBITS 256 + // Address space `define RESET_VECTOR 64'h00000000000100b0 diff --git a/wally-pipelined/config/coremark_bare/wally-config.vh b/wally-pipelined/config/coremark_bare/wally-config.vh index 45f04cff..4389423b 100644 --- a/wally-pipelined/config/coremark_bare/wally-config.vh +++ b/wally-pipelined/config/coremark_bare/wally-config.vh @@ -44,15 +44,26 @@ `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 +`define MEM_DCACHE 1 `define MEM_DTIM 1 -`define MEM_ICACHE 0 +`define MEM_ICACHE 1 `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 +// TLB configuration. Entries should be a power of 2 `define ITLB_ENTRIES 32 `define DTLB_ENTRIES 32 +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 2048 +`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_REPLBITS 3 +`define ICACHE_NUMWAYS 1 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_BLOCKLENINBITS 256 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/wally-pipelined/config/rv32ic/wally-config.vh b/wally-pipelined/config/rv32ic/wally-config.vh index 8b63c56a..1355b5c9 100644 --- a/wally-pipelined/config/rv32ic/wally-config.vh +++ b/wally-pipelined/config/rv32ic/wally-config.vh @@ -42,15 +42,26 @@ `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 +`define MEM_DCACHE 1 `define MEM_DTIM 1 -`define MEM_ICACHE 0 +`define MEM_ICACHE 1 `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 +// TLB configuration. Entries should be a power of 2 `define ITLB_ENTRIES 32 `define DTLB_ENTRIES 32 +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 2048 +`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_REPLBITS 3 +`define ICACHE_NUMWAYS 1 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_BLOCKLENINBITS 256 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/rv32icfd/wally-config.vh b/wally-pipelined/config/rv32icfd/wally-config.vh index 28fc5a50..e27f40d0 100644 --- a/wally-pipelined/config/rv32icfd/wally-config.vh +++ b/wally-pipelined/config/rv32icfd/wally-config.vh @@ -42,15 +42,26 @@ `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 +`define MEM_DCACHE 1 `define MEM_DTIM 1 -`define MEM_ICACHE 0 +`define MEM_ICACHE 1 `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 +// TLB configuration. Entries should be a power of 2 `define ITLB_ENTRIES 32 `define DTLB_ENTRIES 32 +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 2048 +`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_REPLBITS 3 +`define ICACHE_NUMWAYS 1 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_BLOCKLENINBITS 256 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh index 01680b9d..e4cef08c 100644 --- a/wally-pipelined/config/rv64BP/wally-config.vh +++ b/wally-pipelined/config/rv64BP/wally-config.vh @@ -44,15 +44,26 @@ `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 +`define MEM_DCACHE 1 `define MEM_DTIM 1 -`define MEM_ICACHE 0 +`define MEM_ICACHE 1 `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 +// TLB configuration. Entries should be a power of 2 `define ITLB_ENTRIES 32 `define DTLB_ENTRIES 32 +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 2048 +`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_REPLBITS 3 +`define ICACHE_NUMWAYS 1 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_BLOCKLENINBITS 256 + // Address space `define RESET_VECTOR 64'h0000000000000000 diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index 44a90e1c..88d3fd03 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -43,15 +43,26 @@ `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 +`define MEM_DCACHE 1 `define MEM_DTIM 1 -`define MEM_ICACHE 0 +`define MEM_ICACHE 1 `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 +// TLB configuration. Entries should be a power of 2 `define ITLB_ENTRIES 32 `define DTLB_ENTRIES 32 +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 2048 +`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_REPLBITS 3 +`define ICACHE_NUMWAYS 1 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_BLOCKLENINBITS 256 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 @@ -73,7 +84,7 @@ `define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 56'h80000000 -`define TIM_RANGE 56'h07FFFFFF +`define TIM_RANGE 56'h7FFFFFFF `define CLINT_SUPPORTED 1'b1 `define CLINT_BASE 56'h02000000 `define CLINT_RANGE 56'h0000FFFF diff --git a/wally-pipelined/config/rv64icfd/wally-config.vh b/wally-pipelined/config/rv64icfd/wally-config.vh index ac7f4ec6..ec889d64 100644 --- a/wally-pipelined/config/rv64icfd/wally-config.vh +++ b/wally-pipelined/config/rv64icfd/wally-config.vh @@ -43,15 +43,26 @@ `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 +`define MEM_DCACHE 1 `define MEM_DTIM 1 -`define MEM_ICACHE 0 +`define MEM_ICACHE 1 `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 +// TLB configuration. Entries should be a power of 2 `define ITLB_ENTRIES 32 `define DTLB_ENTRIES 32 +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 2048 +`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_REPLBITS 3 +`define ICACHE_NUMWAYS 1 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_BLOCKLENINBITS 256 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/rv64imc/wally-config.vh b/wally-pipelined/config/rv64imc/wally-config.vh index 925e9865..12c220b6 100644 --- a/wally-pipelined/config/rv64imc/wally-config.vh +++ b/wally-pipelined/config/rv64imc/wally-config.vh @@ -42,15 +42,26 @@ `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 -`define MEM_DCACHE 0 +`define MEM_DCACHE 1 `define MEM_DTIM 1 -`define MEM_ICACHE 0 +`define MEM_ICACHE 1 `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 +// TLB configuration. Entries should be a power of 2 `define ITLB_ENTRIES 32 `define DTLB_ENTRIES 32 +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 2048 +`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_REPLBITS 3 +`define ICACHE_NUMWAYS 1 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_BLOCKLENINBITS 256 + // Address space `define RESET_VECTOR 64'h0000000080000000 diff --git a/wally-pipelined/linux-testgen/testvector-generation/debugBuildroot.sh b/wally-pipelined/linux-testgen/testvector-generation/debugBuildroot.sh index 2f1382bb..8807bcdd 100755 --- a/wally-pipelined/linux-testgen/testvector-generation/debugBuildroot.sh +++ b/wally-pipelined/linux-testgen/testvector-generation/debugBuildroot.sh @@ -14,7 +14,7 @@ outDir="../linux-testvectors" # Uncomment this version for QEMU debugging of kernel # - good for poking around VM if it boots up # - good for running QEMU commands (press "Ctrl-A" then "c" to open QEMU command prompt) -$customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio +#$customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio # Uncomment this version for GDB debugging of kernel # - attempts to load in symbols from "vmlinux" # - good for looking at backtraces when Linux gets stuck for some reason @@ -30,9 +30,9 @@ $customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Ima # - Makes qemu_in_gdb_format.txt # - Splits qemu_in_gdb_format.txt into chunks of 100,000 instrs #cat $intermedDir/qemu_output.txt | ./parse_qemu.py >$intermedDir/qemu_in_gdb_format.txt -#cd $intermedDir -#split -d -l 5600000 ./qemu_in_gdb_format.txt --verbose -#cd ../../testvector-generation +cd $intermedDir +split -d -l 5000000 ./qemu_in_gdb_format.txt --verbose +cd ../../testvector-generation # Uncomment this version for parse_gdb_output.py debugging # - Uses qemu_in_gdb_format.txt diff --git a/wally-pipelined/linux-testgen/testvector-generation/gdbinit_debug b/wally-pipelined/linux-testgen/testvector-generation/gdbinit_debug index 0bc9e9b8..2cd32aad 100644 --- a/wally-pipelined/linux-testgen/testvector-generation/gdbinit_debug +++ b/wally-pipelined/linux-testgen/testvector-generation/gdbinit_debug @@ -6,3 +6,5 @@ c file ../buildroot-image-output/vmlinux b plic_init c +b do_idle +c diff --git a/wally-pipelined/regression/regression-wally.py b/wally-pipelined/regression/regression-wally.py index 234c869a..2cd2d77e 100755 --- a/wally-pipelined/regression/regression-wally.py +++ b/wally-pipelined/regression/regression-wally.py @@ -23,11 +23,11 @@ TestCase = namedtuple("TestCase", ['name', 'cmd', 'grepstr']) # edit this list to add more test cases configs = [ - TestCase( - name="busybear", - cmd="vsim -do wally-busybear-batch.do -c > {}", - grepstr="loaded 100000 instructions" - ), + #TestCase( + # name="busybear", + # cmd="vsim -do wally-busybear-batch.do -c > {}", + # grepstr="loaded 100000 instructions" + #), TestCase( name="buildroot", cmd="vsim -do wally-buildroot-batch.do -c > {}", diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index 81d86189..3b5a649f 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -46,6 +46,8 @@ module dcache output logic [`XLEN-1:0] ReadDataM, output logic DCacheStall, output logic CommittedM, + output logic DCacheMiss, + output logic DCacheAccess, // inputs from TLB and PMA/P input logic ExceptionM, @@ -53,7 +55,7 @@ module dcache input logic DTLBMissM, input logic CacheableM, input logic DTLBWriteM, - input logic ITLBWriteF, + input logic ITLBWriteF, // from ptw input logic SelPTW, input logic WalkerPageFaultM, @@ -66,10 +68,14 @@ module dcache output logic [`XLEN-1:0] HWDATA // to ahb ); - localparam integer BLOCKLEN = 256; +/* localparam integer BLOCKLEN = 256; localparam integer NUMLINES = 64; localparam integer NUMWAYS = 4; - localparam integer NUMREPL_BITS = 3; + localparam integer NUMREPL_BITS = 3;*/ + localparam integer BLOCKLEN = `DCACHE_BLOCKLENINBITS; + localparam integer NUMLINES = `DCACHE_WAYSIZEINBYTES*8/BLOCKLEN; + localparam integer NUMWAYS = `DCACHE_NUMWAYS; + localparam integer NUMREPL_BITS = `DCACHE_REPLBITS; localparam integer BLOCKBYTELEN = BLOCKLEN/8; localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN); @@ -416,7 +422,7 @@ module dcache if (reset) CurrState <= #1 STATE_READY; else CurrState <= #1 NextState; - + // next state logic and some state ouputs. always_comb begin DCacheStall = 1'b0; @@ -437,6 +443,8 @@ module dcache CommittedM = 1'b0; SelUncached = 1'b0; SelEvict = 1'b0; + DCacheAccess = 1'b0; + DCacheMiss = 1'b0; case (CurrState) STATE_READY: begin @@ -472,7 +480,8 @@ module dcache // read hit valid cached else if(MemRWM[1] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin DCacheStall = 1'b0; - + DCacheAccess = 1'b1; + if(StallW) begin NextState = STATE_CPU_BUSY; SelAdrM = 1'b1; @@ -485,6 +494,7 @@ module dcache DCacheStall = 1'b0; SRAMWordWriteEnableM = 1'b1; SetDirtyM = 1'b1; + DCacheStall = 1'b1; if(StallW) begin NextState = STATE_CPU_BUSY; @@ -497,6 +507,8 @@ module dcache NextState = STATE_MISS_FETCH_WDV; CntReset = 1'b1; DCacheStall = 1'b1; + DCacheAccess = 1'b1; + DCacheMiss = 1'b1; end // uncached write else if(MemRWM[0] & ~CacheableM & ~(ExceptionM | PendingInterruptM) & ~DTLBMissM) begin diff --git a/wally-pipelined/src/cache/icache.sv b/wally-pipelined/src/cache/icache.sv index 943ab1b8..99c43df5 100644 --- a/wally-pipelined/src/cache/icache.sv +++ b/wally-pipelined/src/cache/icache.sv @@ -53,9 +53,8 @@ module icache ); // Configuration parameters - // TODO Move these to a config file - localparam integer BLOCKLEN = 256; - localparam integer NUMLINES = 512; + localparam integer BLOCKLEN = `ICACHE_BLOCKLENINBITS; + localparam integer NUMLINES = `ICACHE_WAYSIZEINBYTES*8/`ICACHE_BLOCKLENINBITS; // Input signals to cache memory logic FlushMem; diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index 9ef187df..9c53194d 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -89,15 +89,15 @@ module fma1( input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic FmtE, // precision 1 = double 0 = single output logic [2*`NF+1:0] ProdManE, // 1.X frac * 1.Y frac in U(2.2Nf) format - output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in *** format + output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1) output logic [`NE+1:0] ProdExpE, // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign output logic AddendStickyE, // sticky bit that is calculated during alignment output logic KillProdE // set the product to zero before addition if the product is too small to matter ); logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format *** is this enough bits? - logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bit - logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter + logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1) /////////////////////////////////////////////////////////////////////////////// // Calculate the product @@ -132,7 +132,7 @@ module fma1( // |1'b0| addnend | // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) - assign ZManPreShifted = {55'b0, {ZAssumed1E, ZFracE}, 106'b0}; + assign ZManPreShifted = {(`NF+3)'(0), {ZAssumed1E, ZFracE}, /*106*/(2*`NF+2)'(0)}; always_comb begin @@ -140,7 +140,7 @@ module fma1( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - if ($signed(AlignCnt) <= $signed(-13'd56)) begin + if ($signed(AlignCnt) <= /*$signed(-13'd56)*/-(`NF+4)) begin KillProdE = 1; ZManShifted = ZManPreShifted;//{107'b0, {~ZAssumed1E, ZFrac}, 54'b0}; AddendStickyE = ~(XZeroE|YZeroE); @@ -149,7 +149,7 @@ module fma1( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - end else if($signed(AlignCnt) <= $signed(13'd0)) begin + end else if($signed(AlignCnt) <= 0) begin KillProdE = 0; ZManShifted = ZManPreShifted << -AlignCnt; AddendStickyE = |(ZManShifted[51:0]); @@ -158,7 +158,7 @@ module fma1( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - end else if ($signed(AlignCnt)<=$signed(13'd106)) begin + end else if ($signed(AlignCnt)<=(2*`NF+2)) begin KillProdE = 0; ZManShifted = ZManPreShifted >> AlignCnt; AddendStickyE = |(ZManShifted[51:0]); @@ -176,7 +176,7 @@ module fma1( end end - assign AlignedAddendE = ZManShifted[213:52]; + assign AlignedAddendE = ZManShifted[(4*`NF+5):`NF]; endmodule diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 2d1351ec..f283f5e4 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -25,23 +25,23 @@ `include "wally-config.vh" module fpu ( - input logic clk, - input logic reset, - input logic [2:0] FRM_REGW, // Rounding mode from CSR - input logic [31:0] InstrD, - input logic [`XLEN-1:0] ReadDataW, // Read data from memory - input logic [`XLEN-1:0] SrcAE, // Integer input being processed - input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg - input logic StallE, StallM, StallW, - input logic FlushE, FlushM, FlushW, - input logic [4:0] RdE, RdM, RdW, - output logic FRegWriteM, - output logic FStallD, // Stall the decode stage - output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable - output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory - output logic [`XLEN-1:0] FIntResM, - output logic FDivBusyE, // Is the divison/sqrt unit busy - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + input logic clk, + input logic reset, + input logic [2:0] FRM_REGW, // Rounding mode from CSR + input logic [31:0] InstrD, + input logic [`XLEN-1:0] ReadDataW, // Read data from memory + input logic [`XLEN-1:0] SrcAE, // Integer input being processed + input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg + input logic StallE, StallM, StallW, + input logic FlushE, FlushM, FlushW, + input logic [4:0] RdE, RdM, RdW, + output logic FRegWriteM, + output logic FStallD, // Stall the decode stage + output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory + output logic [`XLEN-1:0] FIntResM, + output logic FDivBusyE, // Is the divison/sqrt unit busy + output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic [4:0] SetFflagsM); // FPU result // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS // *** folder at same level of src for tests fpu tests @@ -50,254 +50,256 @@ module fpu ( generate if (`F_SUPPORTED | `D_SUPPORTED) begin // control logic signal instantiation - logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division - logic FWriteIntD; // Write to integer register - logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal - logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; - logic [4:0] Adr1E, Adr2E, Adr3E; - - // regfile signals - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`XLEN-1:0] FSrcXMAligned; - logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding) - logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding) - - // unpacking signals - logic XSgnE, YSgnE, ZSgnE; - logic [10:0] XExpE, YExpE, ZExpE; - logic [51:0] XFracE, YFracE, ZFracE; - logic XAssumed1E, YAssumed1E, ZAssumed1E; - logic XNaNE, YNaNE, ZNaNE; - logic XSNaNE, YSNaNE, ZSNaNE; - logic XDenormE, YDenormE, ZDenormE; - logic XZeroE, YZeroE, ZZeroE; - logic [10:0] BiasE; - logic XInfE, YInfE, ZInfE; - logic XExpMaxE; - logic XNormE; - - logic XSgnM, YSgnM, ZSgnM; - logic [10:0] XExpM, YExpM, ZExpM; - logic [51:0] XFracM, YFracM, ZFracM; - logic XNaNM, YNaNM, ZNaNM; - logic XSNaNM, YSNaNM, ZSNaNM; - logic XZeroM, YZeroM, ZZeroM; - logic XInfM, YInfM, ZInfM; - - // div/sqrt signals - logic [63:0] FDivResultM, FDivResultW; - logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; - logic FDivSqrtDoneE; - logic [63:0] DivInput1E, DivInput2E; - logic HoldInputs; // keep forwarded inputs arround durring division - - //fpu signals - logic [63:0] FMAResM, FMAResW; - logic [4:0] FMAFlgM, FMAFlgW; - - - logic [63:0] ReadResW; - - // add/cvt signals - logic [63:0] FAddResM, FAddResW; - logic [4:0] FAddFlgM, FAddFlgW; - logic [63:0] CvtResE, CvtResM; - logic [4:0] CvtFlgE, CvtFlgM; - - // cmp signals - logic CmpNVE, CmpNVM, CmpNVW; - logic [63:0] CmpResE, CmpResM, CmpResW; - - // fsgn signals - logic [63:0] SgnResE, SgnResM; - logic SgnNVE, SgnNVM, SgnNVW; - logic [63:0] FResM, FResW; - logic [4:0] FFlgM, FFlgW; - - // instantiation of W stage regfile signals - logic [63:0] AlignedSrcAM; - - // classify signals - logic [63:0] ClassResE, ClassResM; - - // 64-bit FPU result - logic [63:0] FPUResultW; - logic [4:0] FPUFlagsW; - - - //DECODE STAGE - - // top-level controller for FPU - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), - .FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, - .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - - // regfile instantiation - fregfile fregfile (clk, reset, FRegWriteW, - InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, - FPUResultW, - FRD1D, FRD2D, FRD3D); - - //***************** - // D/E pipe registers - //***************** - flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); - flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); - - - //EXECUTION STAGE - - // Hazard unit for FPU - fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD, + logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division + logic FWriteIntD; // Write to integer register + logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal + logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; + logic [4:0] Adr1E, Adr2E, Adr3E; + + // regfile signals + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`XLEN-1:0] FSrcXMAligned; + logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding) + logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding) + + // unpacking signals + logic XSgnE, YSgnE, ZSgnE; + logic [10:0] XExpE, YExpE, ZExpE; + logic [51:0] XFracE, YFracE, ZFracE; + logic XAssumed1E, YAssumed1E, ZAssumed1E; + logic XNaNE, YNaNE, ZNaNE; + logic XSNaNE, YSNaNE, ZSNaNE; + logic XDenormE, YDenormE, ZDenormE; + logic XZeroE, YZeroE, ZZeroE; + logic [10:0] BiasE; + logic XInfE, YInfE, ZInfE; + logic XExpMaxE; + logic XNormE; + + logic XSgnM, YSgnM, ZSgnM; + logic [10:0] XExpM, YExpM, ZExpM; + logic [51:0] XFracM, YFracM, ZFracM; + logic XNaNM, YNaNM, ZNaNM; + logic XSNaNM, YSNaNM, ZSNaNM; + logic XZeroM, YZeroM, ZZeroM; + logic XInfM, YInfM, ZInfM; + + // div/sqrt signals + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; + logic FDivSqrtDoneE; + logic [63:0] DivInput1E, DivInput2E; + logic HoldInputs; // keep forwarded inputs arround durring division + + //fpu signals + logic [63:0] FMAResM, FMAResW; + logic [4:0] FMAFlgM, FMAFlgW; + + logic [63:0] ReadResW; + + // add/cvt signals + logic [63:0] FAddResM, FAddResW; + logic [4:0] FAddFlgM, FAddFlgW; + logic [63:0] CvtResE, CvtResM; + logic [4:0] CvtFlgE, CvtFlgM; + + // cmp signals + logic CmpNVE, CmpNVM, CmpNVW; + logic [63:0] CmpResE, CmpResM, CmpResW; + + // fsgn signals + logic [63:0] SgnResE, SgnResM; + logic SgnNVE, SgnNVM, SgnNVW; + logic [63:0] FResM, FResW; + logic [4:0] FFlgM, FFlgW; + + // instantiation of W stage regfile signals + logic [63:0] AlignedSrcAM; + + // classify signals + logic [63:0] ClassResE, ClassResM; + + // 64-bit FPU result + logic [63:0] FPUResultW; + logic [4:0] FPUFlagsW; + + //DECODE STAGE + + // top-level controller for FPU + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); + + // regfile instantiation + fregfile fregfile (clk, reset, FRegWriteW, + InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, + FPUResultW, + FRD1D, FRD2D, FRD3D); + + //***************** + // D/E pipe registers + //***************** + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); + + //EXECUTION STAGE + + // Hazard unit for FPU + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); - - // forwarding muxs - mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); - mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE); - mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE); - - unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); + + // forwarding muxs + mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); + mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE); + mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE); + + unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), + .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, + .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, + .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, + .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, + .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); // first of two-stage instance of floating-point fused multiply-add unit - fma fma (.clk, .reset, .FlushM, .StallM, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, - .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, - // .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, - .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), - .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); - - // first and only instance of floating-point divider - logic fpdivClk; - - clockgater fpdivclkg(.E(FDivStartE), - .SE(1'b0), - .CLK(clk), - .ECLK(fpdivClk)); - - // capture the inputs for div/sqrt - flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E), - .en(1'b1), .clear(FDivSqrtDoneE), - .reset(reset), .clk(HoldInputs)); - flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E), - .en(1'b1), .clear(FDivSqrtDoneE), - .reset(reset), .clk(HoldInputs)); - //*** add round to nearest ties to max magnitude - fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), .P(~FmtE), .FDivBusyE, .HoldInputs, - .OvEn(1'b1), .UnEn(1'b1), .start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM)); + fma fma (.clk, .reset, .FlushM, .StallM, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, . + ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, + .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, + .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, + .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, + // .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, + .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), + .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); + + // first and only instance of floating-point divider + logic fpdivClk; + + clockgater fpdivclkg(.E(FDivStartE), + .SE(1'b0), + .CLK(clk), + .ECLK(fpdivClk)); + + // capture the inputs for div/sqrt + flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E), + .en(1'b1), .clear(FDivSqrtDoneE), + .reset(reset), .clk(HoldInputs)); + flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E), + .en(1'b1), .clear(FDivSqrtDoneE), + .reset(reset), .clk(HoldInputs)); + //*** add round to nearest ties to max magnitude + fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), + .P(~FmtE), .FDivBusyE, .HoldInputs, + .OvEn(1'b1), .UnEn(1'b1), + .start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM)); + // .DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, // .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, // .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); - // assign FDivBusyE = 0; - // first of two-stage instance of floating-point add/cvt unit - faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, - .FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM); - - // first and only instance of floating-point comparator - fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE); - - // first and only instance of floating-point sign converter - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE); - - // first and only instance of floating-point classify unit - fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); - - - fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); - - // output for store instructions - assign FWriteDataE = FSrcYE[`XLEN-1:0]; - - //***************** - // E/M pipe registers - //***************** - flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); - // flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM); - // flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM); - flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM}); - flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM}); - flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM}); - flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM, - {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, - {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); - - - - flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); - flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); - - flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); - flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - - flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM); - flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); - - flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, - {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); - - flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - - //BEGIN MEMORY STAGE - mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM); - mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM); - - // mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned); - mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); - - // Align SrcA to MSB when single precicion - mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM); - mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM); - - //***************** - // M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); - - flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); - - flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); - - flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); - - flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); - - flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, - {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); - - //######################################### - // BEGIN WRITEBACK STAGE - //######################################### - - mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); - mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW); - - - end else begin // no F_SUPPORTED; tie outputs low - assign FStallD = 0; - assign FWriteIntE = 0; - assign FWriteIntM = 0; - assign FWriteIntW = 0; - assign FWriteDataE = 0; - assign FIntResM = 0; - assign FDivBusyE = 0; - assign IllegalFPUInstrD = 1; - assign SetFflagsM = 0; - end + // assign FDivBusyE = 0; + + // first of two-stage instance of floating-point add/cvt unit + faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, + .FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM); + + // first and only instance of floating-point comparator + fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, + .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, + .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE); + + // first and only instance of floating-point sign converter + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE); + + // first and only instance of floating-point classify unit + fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); + + fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); + + // output for store instructions + assign FWriteDataE = FSrcYE[`XLEN-1:0]; + + //***************** + // E/M pipe registers + //***************** + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); + // flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM); + // flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM); + flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM}); + flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM}); + flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM}); + flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM, + {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, + {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); + + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); + flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); + + flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); + flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); + + flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM); + flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); + + flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, + {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); + + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); + + //BEGIN MEMORY STAGE + mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM); + mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM); + + // mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned); + mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); + + // Align SrcA to MSB when single precicion + mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM); + mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM); + + //***************** + // M/W pipe registers + //***************** + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, + {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); + + //######################################### + // BEGIN WRITEBACK STAGE + //######################################### + mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); + mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW); + + + end else begin // no F_SUPPORTED; tie outputs low + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteIntM = 0; + assign FWriteIntW = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 1; + assign SetFflagsM = 0; + end endgenerate - + endmodule // fpu diff --git a/wally-pipelined/src/fpu/fsm.sv b/wally-pipelined/src/fpu/fsm.sv index 0e15f553..8991fb71 100755 --- a/wally-pipelined/src/fpu/fsm.sv +++ b/wally-pipelined/src/fpu/fsm.sv @@ -6,7 +6,7 @@ module fsm (done, load_rega, load_regb, load_regc, input clk; input reset; input start; -// input error; + // input error; input op_type; //***can use divbusy insted of holdinputs output done; @@ -113,8 +113,8 @@ module fsm (done, load_rega, load_regb, load_regc, S1: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -129,8 +129,8 @@ module fsm (done, load_rega, load_regb, load_regc, S2: // iteration 1 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -145,8 +145,8 @@ module fsm (done, load_rega, load_regb, load_regc, S3: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -161,8 +161,8 @@ module fsm (done, load_rega, load_regb, load_regc, S4: // iteration 2 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -177,8 +177,8 @@ module fsm (done, load_rega, load_regb, load_regc, S5: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -193,8 +193,8 @@ module fsm (done, load_rega, load_regb, load_regc, S6: // iteration 3 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -209,8 +209,8 @@ module fsm (done, load_rega, load_regb, load_regc, S7: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -225,8 +225,8 @@ module fsm (done, load_rega, load_regb, load_regc, S8: // q,qm,qp begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -241,8 +241,8 @@ module fsm (done, load_rega, load_regb, load_regc, S9: // rem begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -257,8 +257,8 @@ module fsm (done, load_rega, load_regb, load_regc, S10: // done begin done = 1'b1; - divBusy = 1'b0; - holdInputs = 1'b0; + divBusy = 1'b0; + holdInputs = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -273,8 +273,8 @@ module fsm (done, load_rega, load_regb, load_regc, S13: // start of sqrt path begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -289,8 +289,8 @@ module fsm (done, load_rega, load_regb, load_regc, S14: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -305,8 +305,8 @@ module fsm (done, load_rega, load_regb, load_regc, S15: // iteration 1 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -321,8 +321,8 @@ module fsm (done, load_rega, load_regb, load_regc, S16: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -337,8 +337,8 @@ module fsm (done, load_rega, load_regb, load_regc, S17: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -353,8 +353,8 @@ module fsm (done, load_rega, load_regb, load_regc, S18: // iteration 2 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -369,8 +369,8 @@ module fsm (done, load_rega, load_regb, load_regc, S19: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -385,8 +385,8 @@ module fsm (done, load_rega, load_regb, load_regc, S20: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -401,8 +401,8 @@ module fsm (done, load_rega, load_regb, load_regc, S21: // iteration 3 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -417,8 +417,8 @@ module fsm (done, load_rega, load_regb, load_regc, S22: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -433,8 +433,8 @@ module fsm (done, load_rega, load_regb, load_regc, S23: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -449,8 +449,8 @@ module fsm (done, load_rega, load_regb, load_regc, S24: // q,qm,qp begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -465,8 +465,8 @@ module fsm (done, load_rega, load_regb, load_regc, S25: // rem begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -476,13 +476,13 @@ module fsm (done, load_rega, load_regb, load_regc, sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - NEXT_STATE = S26; - end + NEXT_STATE = S27; + end S26: // done begin done = 1'b1; - divBusy = 1'b0; - holdInputs = 1'b0; + divBusy = 1'b0; + holdInputs = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -497,8 +497,8 @@ module fsm (done, load_rega, load_regb, load_regc, default: begin done = 1'b0; - divBusy = 1'b0; - holdInputs = 1'b0; + divBusy = 1'b0; + holdInputs = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index a3b73d00..71f9c001 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -45,6 +45,8 @@ module lsu output logic CommittedM, output logic SquashSCW, output logic DataMisalignedM, + output logic DCacheMiss, + output logic DCacheAccess, // address and write data input logic [`XLEN-1:0] MemAdrM, @@ -315,6 +317,8 @@ module lsu .ReadDataM(HPTWReadPTE), .DCacheStall(DCacheStall), .CommittedM(CommittedMfromDCache), + .DCacheMiss, + .DCacheAccess, .ExceptionM(ExceptionM), .PendingInterruptM(PendingInterruptMtoDCache), .DTLBMissM(DTLBMissM), diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv index 5d2174f4..0fe2b2d7 100644 --- a/wally-pipelined/src/mmu/pmpadrdec.sv +++ b/wally-pipelined/src/mmu/pmpadrdec.sv @@ -34,9 +34,10 @@ module pmpadrdec ( input logic [7:0] PMPCfg, input logic [`XLEN-1:0] PMPAdr, input logic PAgePMPAdrIn, - input logic NoLowerMatchIn, +// input logic NoLowerMatchIn, + input logic FirstMatch, output logic PAgePMPAdrOut, - output logic NoLowerMatchOut, +// output logic NoLowerMatchOut, output logic Match, Active, output logic L, X, W, R ); @@ -47,7 +48,7 @@ module pmpadrdec ( logic TORMatch, NAMatch; logic PAltPMPAdr; - logic FirstMatch; +// logic FirstMatch; logic [`PA_BITS-1:0] CurrentAdrFull; logic [1:0] AdrMode; @@ -69,16 +70,30 @@ module pmpadrdec ( // verilator lint_off UNOPTFLAT logic [`PA_BITS-1:0] Mask; - genvar i; + //genvar i; // create a mask of which bits to ignore - generate - assign Mask[1:0] = 2'b11; - assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region - for (i=3; i < `PA_BITS; i=i+1) begin:mask - assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore - end - endgenerate + // generate + // assign Mask[1:0] = 2'b11; + // assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region + // for (i=3; i < `PA_BITS; i=i+1) begin:mask + // assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore + // end + // endgenerate + prioritycircuit #(.ENTRIES(`PA_BITS-2), .FINAL_OP("NONE")) maskgen(.a(~PMPAdr[`PA_BITS-3:0]), .FirstPin(AdrMode==NAPOT), .y(Mask[`PA_BITS-1:2])); + assign Mask[1:0] = 2'b11; + + // *** possible experiments: + /* PA < PMP addr could be in its own module, + preeserving hierarchy so we can know if this is the culprit on the critical path + Should take logarthmic time, so more like 6 levels than 40 should be expected + + update mask generation + Should be concurrent with the subtraction/comparison + if one is the critical path, the other shouldn't be which makes us think the mask generation is the culprit. + + Hopefully just use the priority circuit here + */ // verilator lint_on UNOPTFLAT assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | Mask); @@ -87,8 +102,6 @@ module pmpadrdec ( (AdrMode == NA4 || AdrMode == NAPOT) ? NAMatch : 0; - assign FirstMatch = NoLowerMatchIn & Match; - assign NoLowerMatchOut = NoLowerMatchIn & ~Match; assign L = PMPCfg[7] & FirstMatch; assign X = PMPCfg[2] & FirstMatch; assign W = PMPCfg[1] & FirstMatch; diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index 9c7f11da..eac4cc47 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -55,12 +55,9 @@ module pmpchecker ( // Bit i is high when the address falls in PMP region i logic EnforcePMP; logic [7:0] PMPCfg[`PMP_ENTRIES-1:0]; - logic [`PMP_ENTRIES-1:0] Match; // PMP Entry matches + logic [`PMP_ENTRIES-1:0] Match, FirstMatch; // PMP Entry matches logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set - // verilator lint_off UNOPTFLAT - logic [`PMP_ENTRIES-1:0] NoLowerMatch; // None of the lower PMP entries match - // verilator lint_on UNOPTFLAT logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] genvar i,j; @@ -70,9 +67,9 @@ module pmpchecker ( .PMPAdr(PMPADDR_ARRAY_REGW), .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}), .PAgePMPAdrOut(PAgePMPAdr), - .NoLowerMatchIn({NoLowerMatch[`PMP_ENTRIES-2:0], 1'b1}), - .NoLowerMatchOut(NoLowerMatch), - .Match, .Active, .L, .X, .W, .R); + .FirstMatch, .Match, .Active, .L, .X, .W, .R); + + prioritycircuit #(.ENTRIES(`PMP_ENTRIES), .FINAL_OP("AND")) pmppriority(.a(Match), .FirstPin(1'b1), .y(FirstMatch)); // Take the ripple gates/signals out of the pmpadrdec and into another unit. // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; diff --git a/wally-pipelined/src/mmu/tlbpriority.sv b/wally-pipelined/src/mmu/prioritycircuit.sv similarity index 81% rename from wally-pipelined/src/mmu/tlbpriority.sv rename to wally-pipelined/src/mmu/prioritycircuit.sv index 5096cae6..df44b35f 100644 --- a/wally-pipelined/src/mmu/tlbpriority.sv +++ b/wally-pipelined/src/mmu/prioritycircuit.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// tlbpriority.sv +// prioritycircuit.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021 // Modified: Teo Ene 15 Apr 2021: @@ -30,8 +30,10 @@ `include "wally-config.vh" -module tlbpriority #(parameter ENTRIES = 8) ( +module prioritycircuit #(parameter ENTRIES = 8, + parameter FINAL_OP = "AND") ( input logic [ENTRIES-1:0] a, + input logic FirstPin, output logic [ENTRIES-1:0] y ); // verilator lint_off UNOPTFLAT @@ -40,11 +42,19 @@ module tlbpriority #(parameter ENTRIES = 8) ( // generate thermometer code mask genvar i; generate - assign nolower[0] = 1; + assign nolower[0] = FirstPin; for (i=1; i= 128 || `MEM_DCACHE == 0) else $error("DCACHE_BLOCKLENINBITS must be at least 128 when caches are enabled"); + assert (`DCACHE_BLOCKLENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_BLOCKLENINBITS must be smaller than way size"); + assert (`ICACHE_WAYSIZEINBYTES <= 4096 || `MEM_ICACHE == 0 || `MEM_VIRTMEM == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`ICACHE_BLOCKLENINBITS >= 32 || `MEM_ICACHE == 0) else $error("ICACHE_BLOCKLENINBITS must be at least 32 when caches are enabled"); + assert (`ICACHE_BLOCKLENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_BLOCKLENINBITS must be smaller than way size"); + assert (2**$clog2(`DCACHE_BLOCKLENINBITS) == `DCACHE_BLOCKLENINBITS) else $error("DCACHE_BLOCKLENINBITS must be a power of 2"); + assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ICACHE_BLOCKLENINBITS) == `ICACHE_BLOCKLENINBITS) else $error("ICACHE_BLOCKLENINBITS must be a power of 2"); + assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); + assert (`ICACHE_NUMWAYS == 1 || `MEM_ICACHE == 0) else $error("Multiple Instruction Cache ways not yet implemented"); + assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES) else $error("ITLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES) else $error("DTLB_ENTRIES must be a power of 2"); end endmodule + /* verilator lint_on STMTDLY */ /* verilator lint_on WIDTH */ diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index a2566181..7e688413 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -27,7 +27,7 @@ module testbench(); - parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*0459700; // # of instructions at which to turn on waves in graphical sim + parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*0900000; // # of instructions at which to turn on waves in graphical sim parameter stopICount = `BUSYBEAR*143898 + `BUILDROOT*0000000; // # instructions at which to halt sim completely (set to 0 to let it run as far as it can) /////////////////////////////////////////////////////////////////////////////// @@ -103,6 +103,7 @@ module testbench(); logic [99:0] StartCSRexpected[63:0]; string StartCSRname[99:0]; integer data_file_csr, scan_file_csr; + logic IllegalInstrFaultd; // ----------- // Error Macro @@ -153,21 +154,22 @@ module testbench(); clk <= 1; # 5; clk <= 0; # 5; end + // ------------------- + // Additional Hardware + // ------------------- + always @(posedge clk) + IllegalInstrFaultd = dut.hart.priv.IllegalInstrFaultM; + // ------------------------------------- // Special warnings for important faults // ------------------------------------- always @(dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW) begin if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs > 1) begin - $display("!!!!!! illegal instruction !!!!!!!!!!"); - $display("(as a reminder, MCAUSE and MEPC are set by this)"); - $display("at %0t ps, PCM %x, instr %0d, dut.hart.lsu.dcache.MemPAdrM %x", $time, dut.hart.ifu.PCM, instrs, dut.hart.lsu.dcache.MemPAdrM); - `ERROR + // This is sometimes okay if the source code intentionally causes it. + $display("Warning: illegal instruction exception at %0t ps, InstrNum %0d, PCM %x, InstrM %s", $time, instrs, dut.hart.ifu.PCM, PCtextM); end if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 5 && instrs != 0) begin - $display("!!!!!! illegal (physical) memory access !!!!!!!!!!"); - $display("(as a reminder, MCAUSE and MEPC are set by this)"); - $display("at %0t ps, PCM %x, instr %0d, dut.hart.lsu.dcache.MemPAdrM %x", $time, dut.hart.ifu.PCM, instrs, dut.hart.lsu.dcache.MemPAdrM); - `ERROR + $display("Warning: illegal physical memory access exception at %0t ps, InstrNum %0d, PCM %x, InstrM %s", $time, instrs, dut.hart.ifu.PCM, PCtextM); end end @@ -185,8 +187,14 @@ module testbench(); // Hack to compensate for QEMU's incorrect MSTATUS end else if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,16) == "mstatus") begin force dut.hart.ieu.dp.regf.wd3 = dut.hart.ieu.dp.WriteDataW & ~64'ha00000000; - end else - release dut.hart.ieu.dp.regf.wd3; + end else release dut.hart.ieu.dp.regf.wd3; + // Hack to compensate for QEMU's correct but different MTVAL (according to spec, storing the faulting instr is an optional feature) + if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,14) == "mtval") begin + force dut.hart.ieu.dp.WriteDataW = 0; + // Hack to compensate for QEMU's correct but different mhpmcounter's (these too are optional) + end else if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,20) == "mhpmcounter") begin + force dut.hart.ieu.dp.WriteDataW = 0; + end else release dut.hart.ieu.dp.WriteDataW; end end @@ -194,120 +202,95 @@ module testbench(); // Big Chunky Block // ---------------- always @(reset or dut.hart.ifu.InstrRawD or dut.hart.ifu.PCD) begin// or negedge dut.hart.ifu.StallE) begin // Why do we care about StallE? Everything seems to run fine without it. - if(~dut.hart.lsu.dcache.MemRWM) begin // *** Should this need to consider dut.hart.lsu.dcache.MemRWM? - #2; - // If PCD/InstrD aren't garbage - if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && dut.hart.ifu.PCD !== 64'h0) begin // && ~dut.hart.ifu.StallE) begin - // If Wally's PCD has updated - if (dut.hart.ifu.PCD !== lastPCD) begin - lastInstrDExpected = InstrDExpected; - lastPC <= dut.hart.ifu.PCD; - lastPC2 <= lastPC; - // If PCD isn't going to be flushed - if (~PCDwrong || lastPC == PCDexpected) begin + #2; + // If PCD/InstrD aren't garbage + if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && dut.hart.ifu.PCD !== 64'h0) begin // && ~dut.hart.ifu.StallE) begin + // If Wally's PCD has updated + if (dut.hart.ifu.PCD !== lastPCD) begin + lastInstrDExpected = InstrDExpected; + lastPC <= dut.hart.ifu.PCD; + lastPC2 <= lastPC; + // If PCD isn't going to be flushed + if (~PCDwrong || lastPC == PCDexpected) begin + // Stop if we've reached the end + if($feof(data_file_PCF)) begin + $display("no more PC data to read... CONGRATULATIONS!!!"); + `ERROR + end - // Stop if we've reached the end - if($feof(data_file_PCF)) begin - $display("no more PC data to read... CONGRATULATIONS!!!"); - `ERROR - end + // Increment PC + `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); + `SCAN_PC(data_file_PCD, scan_file_PCD, PCtextD, PCtextD2, InstrDExpected, PCDexpected); - // Increment PC - `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); - `SCAN_PC(data_file_PCD, scan_file_PCD, PCtextD, PCtextD2, InstrDExpected, PCDexpected); - - // NOP out certain instructions - if(dut.hart.ifu.PCD===PCDexpected) begin - if((dut.hart.ifu.PCD == 32'h80001dc6) || // for now, NOP out any stores to PLIC - (dut.hart.ifu.PCD == 32'h80001de0) || - (dut.hart.ifu.PCD == 32'h80001de2)) begin - $display("warning: NOPing out %s at PCD=%0x, instr %0d, time %0t", PCtextD, dut.hart.ifu.PCD, instrs, $time); - force InstrDExpected = 32'b0010011; - force dut.hart.ifu.InstrRawD = 32'b0010011; - while (clk != 0) #1; - while (clk != 1) #1; - release dut.hart.ifu.InstrRawD; - release InstrDExpected; - warningCount += 1; - forcedInstr = 1; - end else begin - forcedInstr = 0; - end - end - - // Increment instruction count - if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) || - (instrs <= 1000 && instrs % 100 == 0) || (instrs <= 10000 && instrs % 1000 == 0) || - (instrs <= 100000 && instrs % 10000 == 0) || (instrs % 100000 == 0)) begin - $display("loaded %0d instructions", instrs); - end - instrs += 1; - - // Stop before bugs so "do" file can turn on waves - if (instrs == waveOnICount) begin - $display("turning on waves at %0d instructions", instrs); - $stop; - end else if (instrs == stopICount && stopICount != 0) begin - $display("Ending sim at %0d instructions (set stopICount to 0 to let the sim go on)", instrs); - $stop; - end - - // Check if PCD is going to be flushed due to a branch or jump - if (`BPRED_ENABLED) begin - PCDwrong = dut.hart.hzu.FlushD; //Old version: dut.hart.ifu.bpred.bpred.BPPredWrongE; <-- This old version failed to account for MRET. - end else begin - casex (lastInstrDExpected[31:0]) - 32'b00000000001000000000000001110011, // URET - 32'b00010000001000000000000001110011, // SRET - 32'b00110000001000000000000001110011, // MRET - 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1101111, // JAL - 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100111, // JALR - 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100011, // B - 32'bXXXXXXXXXXXXXXXX110XXXXXXXXXXX01, // C.BEQZ - 32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ - 32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J - PCDwrong = 1; - 32'bXXXXXXXXXXXXXXXX1001000000000010, // C.EBREAK: - 32'bXXXXXXXXXXXXXXXXX000XXXXX1110011: // Something that's not CSRR* - PCDwrong = 0; // tbh don't really know what should happen here - 32'b000110000000XXXXXXXXXXXXX1110011, // CSR* SATP, * - 32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR - 32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL - PCDwrong = 1; - default: - PCDwrong = 0; - endcase - end - - // Check PCD, InstrD - if (~PCDwrong && ~(dut.hart.ifu.PCD === PCDexpected)) begin - $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, PCDexpected); - `ERROR - end - InstrMask = InstrDExpected[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; - if ((~forcedInstr) && (~PCDwrong) && ((InstrMask & dut.hart.ifu.InstrRawD) !== (InstrMask & InstrDExpected))) begin - $display("%0t ps, PCD %x, instr %0d: InstrD %x %s does not equal InstrDExpected %x %s", $time, dut.hart.ifu.PCD, instrs, dut.hart.ifu.InstrRawD, InstrDName, InstrDExpected, PCtextD); - `ERROR - end - - // Repeated instruction means QEMU had an interrupt which we need to spoof - if (PCFexpected == PCDexpected) begin - $display("Note at %0t ps, PCM %x %s, instr %0d: spoofing an interrupt", $time, dut.hart.ifu.PCM, PCtextM, instrs); - // Increment file pointers past the repeated instruction. - `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); - `SCAN_PC(data_file_PCD, scan_file_PCD, PCtextD, PCtextD2, InstrDExpected, PCDexpected); - scan_file_memR = $fscanf(data_file_memR, "%x\n", readAdrExpected); - scan_file_memR = $fscanf(data_file_memR, "%x\n", readDataExpected); - // Next force a timer interrupt (*** this may later need generalizing) - force dut.uncore.genblk1.clint.MTIME = dut.uncore.genblk1.clint.MTIMECMP + 1; + // NOP out certain instructions + if(dut.hart.ifu.PCD===PCDexpected) begin + if((dut.hart.ifu.PCD == 32'h80001dc6) || // for now, NOP out any stores to PLIC + (dut.hart.ifu.PCD == 32'h80001de0) || + (dut.hart.ifu.PCD == 32'h80001de2)) begin + $display("warning: NOPing out %s at PCD=%0x, instr %0d, time %0t", PCtextD, dut.hart.ifu.PCD, instrs, $time); + force InstrDExpected = 32'b0010011; + force dut.hart.ifu.InstrRawD = 32'b0010011; while (clk != 0) #1; - while (clk != 1) #1; - release dut.uncore.genblk1.clint.MTIME; + while (clk != 1) #1; + release dut.hart.ifu.InstrRawD; + release InstrDExpected; + warningCount += 1; + forcedInstr = 1; + end else begin + forcedInstr = 0; end end + + // Increment instruction count + if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) || + (instrs <= 1000 && instrs % 100 == 0) || (instrs <= 10000 && instrs % 1000 == 0) || + (instrs <= 100000 && instrs % 10000 == 0) || (instrs % 100000 == 0)) begin + $display("loaded %0d instructions", instrs); + end + instrs += 1; + + // Stop before bugs so "do" file can turn on waves + if (instrs == waveOnICount) begin + $display("turning on waves at %0d instructions", instrs); + $stop; + end else if (instrs == stopICount && stopICount != 0) begin + $display("Ending sim at %0d instructions (set stopICount to 0 to let the sim go on)", instrs); + $stop; + end + + // Check if PCD is going to be flushed due to a branch or jump + if (`BPRED_ENABLED) begin + PCDwrong = dut.hart.hzu.FlushD || (PCtextE.substr(0,3) == "mret"); //Old version: dut.hart.ifu.bpred.bpred.BPPredWrongE; <-- This old version failed to account for MRET. + end + + // Check PCD, InstrD + if (~PCDwrong && ~(dut.hart.ifu.PCD === PCDexpected)) begin + $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, PCDexpected); + `ERROR + end + InstrMask = InstrDExpected[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; + if ((~forcedInstr) && (~PCDwrong) && ((InstrMask & dut.hart.ifu.InstrRawD) !== (InstrMask & InstrDExpected))) begin + $display("%0t ps, PCD %x, instr %0d: InstrD %x %s does not equal InstrDExpected %x %s", $time, dut.hart.ifu.PCD, instrs, dut.hart.ifu.InstrRawD, InstrDName, InstrDExpected, PCtextD); + `ERROR + end + + // Repeated instruction means QEMU had an interrupt which we need to spoof + if (PCFexpected == PCDexpected) begin + $display("Note at %0t ps, PCM %x %s, instr %0d: spoofing an interrupt", $time, dut.hart.ifu.PCM, PCtextM, instrs); + // Increment file pointers past the repeated instruction. + `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); + `SCAN_PC(data_file_PCD, scan_file_PCD, PCtextD, PCtextD2, InstrDExpected, PCDexpected); + scan_file_memR = $fscanf(data_file_memR, "%x\n", readAdrExpected); + scan_file_memR = $fscanf(data_file_memR, "%x\n", readDataExpected); + // Next force a timer interrupt (*** this may later need generalizing) + force dut.uncore.genblk1.clint.MTIME = dut.uncore.genblk1.clint.MTIMECMP + 1; + while (clk != 0) #1; + while (clk != 1) #1; + release dut.uncore.genblk1.clint.MTIME; + end end - lastPCD = dut.hart.ifu.PCD; end + lastPCD = dut.hart.ifu.PCD; end end @@ -360,9 +343,8 @@ module testbench(); end `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); `SCAN_PC(data_file_PCW, scan_file_PCW, trashString, trashString, InstrWExpected, PCWexpected); - // If repeated instr + // If repeated or instruction, we want to skip over it (indicates an interrupt) if (PCMexpected == PCWexpected) begin - // Increment file pointers past the repeated instruction. `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); `SCAN_PC(data_file_PCW, scan_file_PCW, trashString, trashString, InstrWExpected, PCWexpected); end @@ -371,6 +353,11 @@ module testbench(); `ERROR end end + // Skip over faulting instructions because they do not make it to the W stage. + if (IllegalInstrFaultd) begin + `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); + `SCAN_PC(data_file_PCW, scan_file_PCW, trashString, trashString, InstrWExpected, PCWexpected); + end end @@ -453,8 +440,7 @@ module testbench(); // Read Checker // ------------ always @(negedge clk) begin - //if (dut.hart.MemRWM[1] && ~dut.hart.StallM && ~dut.hart.FlushM && dut.hart.ieu.InstrValidM) begin <-- This doesn't work because ReadDataM can be used for other things (namely page table walking) while the pipeline is stalled, leaving it in a different state when the pipeline unstalls - if (dut.hart.MemRWM[1] && dut.hart.lsu.dcache.ReadDataWEn) begin // <-- ReadDataWEn is a good indicator that the pipeline is using the current contents of ReadDataM + if (dut.hart.MemRWM[1] && ~dut.hart.StallM && ~dut.hart.FlushM && dut.hart.ieu.InstrValidM) begin if($feof(data_file_memR)) begin $display("no more memR data to read"); `ERROR @@ -530,45 +516,61 @@ module testbench(); // -------------- // Checker Macros // -------------- - string MSTATUSstring = "MSTATUS"; // string variables seem to compare more reliably than string literals (they gave me a lot of hassle), but *** there's probably a better way to do this + // String variables seem to compare more reliably than string literals (they gave me a lot of hassle), + // but *** there's probably a better way to do this. + // You can't just use the "__name" variables though because you need to declare variables before using them. + string MSTATUSstring = "MSTATUS"; + string MIPstring = "MIP"; + string MEPCstring = "MEPC"; + string MCAUSEstring = "MCAUSE"; + string MTVALstring = "MTVAL"; string SEPCstring = "SEPC"; string SCAUSEstring = "SCAUSE"; string SSTATUSstring = "SSTATUS"; + + logic [63:0] expectedCSR; + string expectedCSRname; `define CHECK_CSR2(CSR, PATH) \ - logic [63:0] expected``CSR``; \ - string CSR; \ string ``CSR``name = `"CSR`"; \ - string expected``CSR``name; \ always @(``PATH``.``CSR``_REGW) begin \ - if ($time > 1 && (`BUILDROOT != 1 || ``CSR``name != SSTATUSstring)) begin \ - // This is some feeble hackery designed to control the order in which CSRs are checked \ - // when multiple change at the same time. \ - if (``CSR``name == SEPCstring) #1; \ - if (``CSR``name == SCAUSEstring) #2; \ - if (``CSR``name == SSTATUSstring) #3; \ - scan_file_csr = $fscanf(data_file_csr, "%s\n", expected``CSR``name); \ - scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \ - if(expected``CSR``name.icompare(``CSR``name)) begin \ - $display("%0t ps, PCM %x %s, instr %0d: %s changed, expected %s", $time, dut.hart.ifu.PCM, PCtextM, instrs, `"CSR`", expected``CSR``name); \ + if (instrs == 0 && ~reset) begin \ + for(integer j=0; j